Code
load(analysis_file_wide)
load(analysis_file_wide2)
load(rates_file1)
load(rates_file2)
load(rates_file3)
load(standard_file)
load(popsum_file)
load(population_file1)
load(population_file2)This document outlines of the second publication.
This is an interactive document. You can use the navigation pane on the right (“Table of contents”) to jump between sections. Subsections in the navigation open by clicking on the main section.
wide_spc_methods (Individual level cancer data (ZfKD + SEER data) in wide format):
H:/Documents/Projects/SPN Data Analysis/Publications/pub_spc_sirmethods_bmed/1_input/81.spn.dataset.methods.wide.RData (last modified: 2023-07-29 01:25:17.633593)
dependent on scripts:
01.cr_read.seer.dataset.R
03.cr_dm.seer.dataset.R
04.cr_save.seer.analysis.dataset.R
11.cr_read.zfkd.dataset.R
13.cr_dm.zfkd.dataset.R
14.cr_save.zfkd.analysis.dataset.R
81.01.cr_prefilter.methods.zfkd.R
81.02.cr_prefilter.methods.seer.R
81.03.cr_dm.save.methods.R
wide_spc_methods_iarc (Individual level cancer data (ZfKD + SEER data) in wide format, only counting international primaries):
H:/Documents/Projects/SPN Data Analysis/Publications/pub_spc_sirmethods_bmed/1_input/83.spn.dataset.methods.iarc.wide.RData (last modified: 2023-07-29 01:42:26.036531)
dependent on scripts:
01.cr_read.seer.dataset.R
03.cr_dm.seer.dataset.R
04.cr_save.seer.analysis.dataset.R
11.cr_read.zfkd.dataset.R
13.cr_dm.zfkd.dataset.R
14.cr_save.zfkd.analysis.dataset.R
81.11.cr_prefilter.methods.iarc.zfkd.R
81.12.cr_prefilter.methods.iarc.seer.R
81.13.cr_dm.save.methods.R
refrates_lungcancer_dco_calc (File with reference incidence rates for lung cancer, including DCO cases, calculated from registry data)
H:/Documents/Projects/SPN Data Analysis/Publications/pub_spc_sirmethods_bmed/1_input/58.refrates.methods.lungcancer.dco.calculated.RData (last modified: 2023-07-29 21:19:03.574644
dependent on scripts:
57.cr_read.refrates.us.dco.R
83.05.cr_refrates.from.cohort.zfkd.dco.R
83.07.cr_refrates.merge.methods.R
refrates_methods_lcsubtype_histgroupiarc_dco (File with reference incidence rates for subtypes of lung cancer, based on t_histgroupiarc, including DCO cases, calculated from registry data)
H:/Documents/Projects/SPN Data Analysis/Publications/pub_spc_sirmethods_bmed/1_input/82.02.rates.lc.subtype.histgroupiarc.methods.dco.RData (last modified: 2023-07-29 01:50:14.752512)
dependent on scripts:
82.21.cr_refrates.by.lcsubtype.histgroupiarc.seer.R
82.22.cr_refrates.by.lcsubtype.histgroupiarc.zfkd.R
82.23.cr_refrates.by.lcsubtype.histgroupiarc.methods.R
refrates_methods_lcsubtype_histgroupiarc_iarc_dco (File with reference incidence rates for subtypes of lung cancer, based on t_histgroupiarc, including DCO cases, calculated from registry data; only counting cases that fulfill IARC/IACR MP Rules)
H:/Documents/Projects/SPN Data Analysis/Publications/pub_spc_sirmethods_bmed/1_input/82.03.rates.lc.subtype.histgroupiarc.methods.iarc.dco.RData (last modified: 2023-07-29 01:53:03.367453)
dependent on scripts:
82.31.cr_refrates.by.lcsubtype.histgroupiarc.iarc.seer.R
82.22.cr_refrates.by.lcsubtype.zfkd.R
82.33.cr_refrates.by.lcsubtype.histgroupiarc.methods.iarc.R
load(analysis_file_wide)
load(analysis_file_wide2)
load(rates_file1)
load(rates_file2)
load(rates_file3)
load(standard_file)
load(popsum_file)
load(population_file1)
load(population_file2)We will only use the sub-region “DEA3 Muenster” instead of all “DEA Northrhine-Westphalia”.
wide_spc_methods <- wide_spc_methods %>%
mutate(p_region.1 = case_when(NUTS_2_Code.1 == "DEA3" ~ "DEA3 Muenster",
.default = p_region.1),
p_region.2 = case_when(NUTS_2_Code.2 == "DEA3" ~ "DEA3 Muenster",
.default = p_region.2))wide_spc_methods_iarc <- wide_spc_methods_iarc %>%
mutate(p_region.1 = case_when(NUTS_2_Code.1 == "DEA3" ~ "DEA3 Muenster",
.default = p_region.1),
p_region.2 = case_when(NUTS_2_Code.2 == "DEA3" ~ "DEA3 Muenster",
.default = p_region.2))wide_spc_methods <- wide_spc_methods %>%
mutate(t_lung.1 = case_when(t_sitewhogen.1 == "Lung and Bronchus" ~ 1,
is.na(t_sitewhogen.1) ~ 0,
.default = 0),
t_lung.2 = case_when(t_sitewhogen.2 == "Lung and Bronchus" ~ 1,
is.na(t_sitewhogen.2) ~ 0,
.default = 0))wide_spc_methods_iarc <- wide_spc_methods_iarc %>%
mutate(t_lung.1 = case_when(t_sitewhogen.1 == "Lung and Bronchus" ~ 1,
is.na(t_sitewhogen.1) ~ 0,
TRUE ~ 0),
t_lung.2 = case_when(t_sitewhogen.2 == "Lung and Bronchus" ~ 1,
is.na(t_sitewhogen.2) ~ 0,
TRUE ~ 0))wide_spc_methods <- wide_spc_methods %>%
mutate(
t_lungiarc.1 = case_when(
t_sitewhogen.1 == "Lung and Bronchus" & reg.1 == "zfkd" ~ 1,
t_sitewhogen.1 == "Lung and Bronchus" & reg.1 == "seer" & INTPRIM.1 == 1 ~ 1,
is.na(t_sitewhogen.1) ~ 0,
.default = 0),
t_lungiarc.2 = case_when(
t_sitewhogen.2 == "Lung and Bronchus" & reg.1 == "zfkd" ~ 1,
t_sitewhogen.2 == "Lung and Bronchus" & reg.1 == "seer" & INTPRIM.2 == 1 ~ 1,
is.na(t_sitewhogen.2) ~ 0,
.default = 0)
)wide_spc_methods_iarc <- wide_spc_methods_iarc %>%
mutate(
t_lungiarc.1 = case_when(
t_sitewhogen.1 == "Lung and Bronchus" & reg.1 == "zfkd" ~ 1,
t_sitewhogen.1 == "Lung and Bronchus" & reg.1 == "seer" & INTPRIM.1 == 1 ~ 1,
is.na(t_sitewhogen.1) ~ 0,
.default = 0),
t_lungiarc.2 = case_when(
t_sitewhogen.2 == "Lung and Bronchus" & reg.1 == "zfkd" ~ 1,
t_sitewhogen.2 == "Lung and Bronchus" & reg.1 == "seer" & INTPRIM.2 == 1 ~ 1,
is.na(t_sitewhogen.2) ~ 0,
.default = 0)
)
wide_spc_methods_iarc %>%
count(t_lung.1, t_lungiarc.1, t_lung.2, t_lungiarc.2, reg.1)wide_spc_methods <- wide_spc_methods %>%
dplyr::mutate(p_agefcgroup = case_when(
t_agediag.1 < 30 ~ 10,
t_agediag.1 >= 30 & t_agediag.1 < 50 ~ 11,
t_agediag.1 >= 50 & t_agediag.1 < 60 ~ 12,
t_agediag.1 >= 60 & t_agediag.1 < 70 ~ 13,
t_agediag.1 >= 70 & t_agediag.1 < 80 ~ 14,
t_agediag.1 >= 80 ~ 15,
.default = NA_real_)) %>%
sjlabelled::var_labels(p_agefcgroup="Age at diagnosis of first cancer [grouped]") %>%
sjlabelled::set_labels(p_agefcgroup, labels = c("<30 " = 10,
"30 - 49" = 11,
"50 - 59" = 12,
"60 - 69" = 13,
"70 - 79" = 14,
"80+" = 15)) %>%
mutate(dplyr::across(.cols = p_agefcgroup, .fns = ~ sjlabelled::as_label(.x , keep.labels=TRUE))) wide_spc_methods_iarc <- wide_spc_methods_iarc %>%
dplyr::mutate(p_agefcgroup = case_when(
t_agediag.1 < 30 ~ 10,
t_agediag.1 >= 30 & t_agediag.1 < 50 ~ 11,
t_agediag.1 >= 50 & t_agediag.1 < 60 ~ 12,
t_agediag.1 >= 60 & t_agediag.1 < 70 ~ 13,
t_agediag.1 >= 70 & t_agediag.1 < 80 ~ 14,
t_agediag.1 >= 80 ~ 15,
.default = NA_real_)) %>%
sjlabelled::var_labels(p_agefcgroup="Age at diagnosis of first cancer [grouped]") %>%
sjlabelled::set_labels(p_agefcgroup, labels = c("<30 " = 10,
"30 - 49" = 11,
"50 - 59" = 12,
"60 - 69" = 13,
"70 - 79" = 14,
"80+" = 15)) %>%
mutate(dplyr::across(.cols = p_agefcgroup, .fns = ~ sjlabelled::as_label(.x , keep.labels=TRUE))) wide_spc_methods <- wide_spc_methods %>%
mutate(
p_yearfcgroup = case_when(
t_singleyeardiag.1 >= 2002 & t_singleyeardiag.1 < 2006 ~ 7,
t_singleyeardiag.1 >= 2006 & t_singleyeardiag.1 < 2010 ~ 8,
t_singleyeardiag.1 >= 2010 & t_singleyeardiag.1 < 2014 ~ 9,
.default = NA_real_)) %>%
sjlabelled::var_labels(p_yearfcgroup = "Time period of diagnosis of first cancer") %>%
sjlabelled::set_labels(p_yearfcgroup, labels = c("2002 - 2005" = 7,
"2006 - 2009" = 8,
"2010 - 2013" = 9)) %>%
mutate(dplyr::across(.cols = p_yearfcgroup, .fns = ~ sjlabelled::as_label(.x , keep.labels=TRUE))) wide_spc_methods_iarc <- wide_spc_methods_iarc %>%
mutate(
p_yearfcgroup = case_when(
t_singleyeardiag.1 >= 2002 & t_singleyeardiag.1 < 2006 ~ 7,
t_singleyeardiag.1 >= 2006 & t_singleyeardiag.1 < 2010 ~ 8,
t_singleyeardiag.1 >= 2010 & t_singleyeardiag.1 < 2014 ~ 9,
.default = NA_real_)) %>%
sjlabelled::var_labels(p_yearfcgroup = "Time period of diagnosis of first cancer") %>%
sjlabelled::set_labels(p_yearfcgroup, labels = c("2002 - 2005" = 7,
"2006 - 2009" = 8,
"2010 - 2013" = 9)) %>%
mutate(dplyr::across(.cols = p_yearfcgroup, .fns = ~ sjlabelled::as_label(.x , keep.labels=TRUE))) wide_spc_methods <- wide_spc_methods %>%
mutate(
p_statuseventlc = case_when(
p_status.1 %in% c("Patient alive after SPC", "Patient dead after SPC") & t_lung.2 == 1 ~ 110, # -> SPLC developed
p_status.1 %in% c("Patient alive after SPC", "Patient dead after SPC") ~ 120, #alive or dead with SPC developed - > other SPC developed
p_status.1 == "Patient dead after FC" ~ 200, #dead after FC -> dead after LC
p_status.1 == "Patient alive after FC (with or without following SPC after end of FU)" ~ 300, #alive after FC --> no event until end of FU
p_status.1 == "NA - Patient not born before end of FU " ~ 999,
p_status.1 == "NA - Patient did not develop cancer before end of FU" ~ 999,
.default = NA_real_)) %>%
sjlabelled::var_labels(p_statuseventlc = "Patient status (events: SPC developed, dead after LC, no event until end of FU)") %>%
sjlabelled::set_labels(p_statuseventlc, labels = c(
"SPLC developed" = 110,
"other SPC developed" = 120,
"dead after LC" = 200,
"no event until end of follow-up" = 300,
"Error - check this" = 999)) %>%
mutate(dplyr::across(.cols = p_statuseventlc, .fns = ~ sjlabelled::as_label(.x , keep.labels=TRUE))) wide_spc_methods_iarc <- wide_spc_methods_iarc %>%
mutate(
p_statuseventlc = case_when(
p_status.1 %in% c("Patient alive after SPC", "Patient dead after SPC") & t_lung.2 == 1 ~ 110, # -> SPLC developed
p_status.1 %in% c("Patient alive after SPC", "Patient dead after SPC") ~ 120, #alive or dead with SPC developed - > other SPC developed
p_status.1 == "Patient dead after FC" ~ 200, #dead after FC -> dead after LC
p_status.1 == "Patient alive after FC (with or without following SPC after end of FU)" ~ 300, #alive after FC --> no event until end of FU
p_status.1 == "NA - Patient not born before end of FU " ~ 999,
p_status.1 == "NA - Patient did not develop cancer before end of FU" ~ 999,
.default = NA_real_)) %>%
sjlabelled::var_labels(p_statuseventlc = "Patient status (events: SPC developed, dead after LC, no event until end of FU)") %>%
sjlabelled::set_labels(p_statuseventlc, labels = c(
"SPLC developed" = 110,
"other SPC developed" = 120,
"dead after LC" = 200,
"no event until end of follow-up" = 300,
"Error - check this" = 999)) %>%
mutate(dplyr::across(.cols = p_statuseventlc, .fns = ~ sjlabelled::as_label(.x , keep.labels=TRUE))) wide_spc_methods %>%
#exclude unusual and Sarcoma according to IARC
filter(!(t_sublungiarc.1 %in% c("Excluded", "Unusual"))) %>%
#analyze remaining unusual according to Barclay et al.
filter(t_sublung.1 %in% c("Excluded - unusual", "Excluded - benign")) %>%
count(t_hist.1, t_histgroupiarc.1) %>%
arrange(desc(n))res_n_excluded_dis1 <- wide_spc_methods %>%
#exclude unusual and Sarcoma according to IARC
filter(!(t_sublungiarc.1 %in% c("Excluded", "Unusual"))) %>%
#analyze remaining unusual according to Barclay et al.
filter(t_sublung.1 %in% c("Excluded - unusual", "Excluded - benign")) %>%
nrow()There are 1092 cases where exclusion of unusual in t_sublungiarc would not result in exclusion by Barclay et al. This difference is small and can be ignored.
wide_spc_methods %>%
#exclude unusual and Sarcoma according to IARC
filter(t_sublungiarc.1 %in% c("Unusual", "Excluded")) %>%
#analyze remaining unusual according to Barclay et al.
filter(!(t_sublung.1 %in% c("Excluded - unusual", "Excluded - benign"))) %>%
count(t_hist.1, t_histgroupiarc.1) %>%
arrange(desc(n))res_n_excluded_dis2 <- wide_spc_methods %>%
#analyze remaining unusual according to Barclay et al.
filter(!(t_sublung.1 %in% c("Excluded - unusual", "Excluded - benign"))) %>%
#exclude unusual and Sarcoma according to IARC
filter(t_sublungiarc.1 %in% c("Unusual", "Excluded")) %>%
nrow()There are 160 cases where exclusion of unusual exclusion by Barclay et al. would not result in exclusion by t_sublungiarc. This difference is small and can be ignored.
d0_lung_wide_raw <- wide_spc_methods %>%
#S0: only keep primary LC
tidylog::filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
tidylog::filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisisana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
tidylog::filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
))filter: removed 6,523,936 rows (79%), 1,785,087 rows remaining
filter: removed 776,173 rows (43%), 1,008,914 rows remaining
filter: removed 260,640 rows (26%), 748,274 rows remaining
# same inclusion criteria as for Eberl et al 2022
# but exclusion of lung subtype exclusion based on both t_sublungiarc and t_sublung and we keep sarcoma
# additionally SPLC cannot be benign or unsual
d1_lung_wide <- wide_spc_methods %>%
#S0: only keep primary LC
tidylog::filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
tidylog::filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisiana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
tidylog::filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
# S3: filter for patients with LC diagnosis at age 30 to 99 years.
tidylog::filter(t_agediag.1 >= 30 & t_agediag.1 < 100) %>%
# S4: exclusion of unusual histology of first LC
tidylog::filter(!(t_sublungiarc.1 %in% c("Excluded", "Unusual")) &
!(t_sublung.1 %in% c("Excluded - unusual"))) %>%
# S5: delete DCO at first LC
tidylog::filter(t_confirm.1 != "DCO" | is.na(t_confirm.1)) %>%
# S6: minimum follow-up without SPC, death or end of FU >= 6 months (0.5 years) after first lung cancer diagnosis
tidylog::filter(p_futimeyrs.1 >= 0.5) %>%
# S7: exclusion of unusual histology of SPLC
tidylog::filter(!(t_sublungiarc.2 %in% c("Excluded", "Unusual")) &
!(t_sublung.2 %in% c("Excluded - unusual")))filter: removed 6,523,936 rows (79%), 1,785,087 rows remaining
filter: removed 776,173 rows (43%), 1,008,914 rows remaining
filter: removed 260,640 rows (26%), 748,274 rows remaining
filter: removed 1,239 rows (<1%), 747,035 rows remaining
filter: removed 472 rows (<1%), 746,563 rows remaining
filter: removed 40,687 rows (5%), 705,876 rows remaining
filter: removed 306,478 rows (43%), 399,398 rows remaining
filter: removed 4 rows (<1%), 399,394 rows remaining
#test that selecting based on t_siteicdocat and t_sitewhogen gives the same result
testthat::expect_equal(
wide_spc_methods %>% filter(t_siteicdocat.1 %in% c("C34")) %>% nrow(),
wide_spc_methods %>% filter(t_sitewhogen.1 %in% c("Lung and Bronchus")) %>% nrow()
)d1_lung_wide %>%
mutate(p_status.1 = str_trunc(as.character(p_status.1), 30)) %>%
count(p_status.1, p_statuseventlc) p_status.1 p_statuseventlc n
1 Patient alive after FC (wit... no event until end of follow-up 94122
2 Patient alive after SPC SPLC developed 2996
3 Patient alive after SPC other SPC developed 4912
4 Patient dead after FC dead after LC 284076
5 Patient dead after SPC SPLC developed 4423
6 Patient dead after SPC other SPC developed 8865
#test that no patient status is unknown
testthat::expect_equal(
d1_lung_wide %>% filter(
!p_status.1 %in% c("Patient alive after FC (with or without following SPC after end of FU)", "Patient alive after SPC", "Patient dead after FC", "Patient dead after SPC")) %>% nrow(),
0
)#test that no missings in t_sublungiarc.1
testthat::expect_equal(
d1_lung_wide %>% filter(is.na(t_sublungiarc.1)) %>% nrow(),
0
)
# #in case test fails, you can identify the problems
# d1_lung_wide %>%
# filter(is.na(t_sublungiarc.1))%>%
# count(reg.1, t_histgroupseer.1, t_hist.1)
#test that no missings in t_sublungiarc.2
testthat::expect_equal(
d1_lung_wide %>% filter(t_sitewhogen.2 == "Lung and Bronchus" & is.na(t_sublungiarc.2)) %>% nrow(),
0
)
d1_lung_wide %>%
filter(t_sitewhogen.2 == "Lung and Bronchus")%>%
count(t_sublungiarc.2) t_sublungiarc.2 n
1 Squamous cell carcinoma 2180
2 Adenocarcinoma 4286
3 Small cell carcinoma 869
4 Large cell carcinoma 379
5 Other specified carcinoma (incl Carcinoid) 1150
6 Sarcoma 11
7 Other specified malignant neoplasm 1
8 Unspecified 343
d1_lung_wide %>%
filter(t_sitewhogen.2 == "Lung and Bronchus")%>%
count(t_sublungiarcgroup.2) t_sublungiarcgroup.2 n
1 Squamous cell carcinoma (SCC) 2180
2 Adenocarcinoma (AC) 4286
3 Small cell carcinoma (SCLC) 869
4 Large cell carcinoma (LCC) 379
5 Other & unspecified (O&U) 1505
#show which cases have not developed SPC before end of FU
d1_lung_wide %>%
dplyr::count(p_spc.1, p_status.1)#make detailed breakdown by t_datediag.2
d1_lung_wide %>%
dplyr::filter(sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 & p_spc.1 == "SPC developed") %>%
dplyr::count(p_spc.1, p_status.1, t_datediag.2)#prepare a check
n_patstatus_reset <- d1_lung_wide %>%
dplyr::filter((
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 & p_spc.1 == "SPC developed") |
(sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 & p_spc.1 == "SPC developed")) %>%
nrow()
n_na_before <- d1_lung_wide %>%
dplyr::filter(is.na(t_datediag.2)) %>%
nrow()
#reset all diagnosis for second cancer
d1_lung_wide <- d1_lung_wide %>%
#make tibble to avoid errors
tibble::as_tibble() %>%
#set all cols to missing for pat_status 1(alive, no SPC yet); 3(dead, no SPC)
dplyr::mutate(across(
.cols = where(~ is.double(.x) && !lubridate::is.Date(.x)) & ends_with(".2"),
.fns = ~case_when(
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 ~ NA_real_,
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 ~ NA_real_,
TRUE ~ .x))) %>%
#integer vars
dplyr::mutate(across(
.cols = where(is.integer) & ends_with(".2"),
.fns = ~case_when(
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 ~ NA_integer_,
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 ~ NA_integer_,
TRUE ~ .x))) %>%
#date vars
dplyr::mutate(across(
.cols = where(lubridate::is.Date) & ends_with(".2"),
.fns = ~case_when(
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 ~ as.Date(NA),
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 ~ as.Date(NA),
TRUE ~ .x))) %>%
#character vars
dplyr::mutate(across(
.cols = where(is.character) & ends_with(".2"),
.fns = ~case_when(
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 ~ NA_character_,
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 ~ NA_character_,
TRUE ~ .x))) %>%
#factor vars
dplyr::mutate(across(
.cols = where(is.factor) & ends_with(".2"),
.fns = ~case_when(
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 ~ NA_character_,
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 ~ NA_character_,
TRUE ~ as.character(.x)))) %>%
#spc var
dplyr::mutate(p_spc = case_when(
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 ~ "No SPC",
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 ~ "No SPC",
TRUE ~ as.character(p_spc.1)))
n_na_after <- d1_lung_wide %>%
dplyr::filter(is.na(t_datediag.2)) %>%
nrow()
#test that no more variables are set to NA than expected
testthat::expect_equal(n_patstatus_reset, (n_na_after - n_na_before))
#show that status and p_spc are coherent
d1_lung_wide %>%
dplyr::count(p_spc, p_status.1)rm(n_patstatus_reset, n_na_after, n_na_before)d1_lung_wide <- d1_lung_wide %>%
mutate(t_lung.1 = case_when(t_sitewhogen.1 == "Lung and Bronchus" ~ 1,
is.na(t_sitewhogen.1) ~ 0,
TRUE ~ 0),
t_lung.2 = case_when(t_sitewhogen.2 == "Lung and Bronchus" ~ 1,
is.na(t_sitewhogen.2) ~ 0,
TRUE ~ 0))d1_lung_wide <- d1_lung_wide %>%
mutate(t_lungiarc.1 = case_when(t_sitewhogen.1 == "Lung and Bronchus" & reg.1 == "zfkd" ~ 1,
t_sitewhogen.1 == "Lung and Bronchus" & reg.1 == "seer" & INTPRIM.1 == 1 ~ 1,
is.na(t_sitewhogen.1) ~ 0,
TRUE ~ 0),
t_lungiarc.2 = case_when(t_sitewhogen.2 == "Lung and Bronchus" & reg.1 == "zfkd" ~ 1,
t_sitewhogen.2 == "Lung and Bronchus" & reg.1 == "seer" & INTPRIM.2 == 1 ~ 1,
is.na(t_sitewhogen.2) ~ 0,
TRUE ~ 0)
)
d1_lung_wide %>%
count(t_lung.1, t_lungiarc.1, t_lung.2, t_lungiarc.2, reg.1)# same inclusion criteria as for Eberl et al 2022
# but exclusion of lung subtype exclusion based on t_sublungiarc instead of t_sublung and we keep sarcoma
# additionally SPLC cannot be benign or unsual
# additionally only counting INTPRIM (according to IARC MP rules)
d2_lung_wide_iarc <- wide_spc_methods_iarc %>%
#S0: only keep primary LC
tidylog::filter(t_lungiarc.1 == 1) %>% #i.e. t_sitewhogen == "Lung and Bronchus" and INTPRIM==1 for SEER data to only count primaries according to international rules
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
tidylog::filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisisana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
tidylog::filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
# S3: filter for patients with LC diagnosis at age 30 to 99 years.
tidylog::filter((t_agediag.1 >= 30 & t_agediag.1 < 100)) %>%
# S4: exclusion of unusual histology of first LC
tidylog::filter(!(t_sublungiarc.1 %in% c("Excluded", "Unusual")) &
!(t_sublung.1 %in% c("Excluded - unusual"))) %>%
# S5: delete DCO at first LC
tidylog::filter(t_confirm.1 != "DCO" | is.na(t_confirm.1)) %>%
# S6: minimum follow-up without SPC, death or end of FU >= 6 months (0.5 years)after first lung cancer diagnosis -> this results in more cases remaining than for d1, because less SPC are considered and therefore longer "survival" is achieved
tidylog::filter(p_futimeyrs.1 >= 0.5) %>%
# S7: exclusion of unusual histology of SPLC
tidylog::filter(!(t_sublungiarc.2 %in% c("Excluded", "Unusual")) &
!(t_sublung.2 %in% c("Excluded - unusual")))filter: removed 6,507,559 rows (78%), 1,784,122 rows remaining
filter: removed 775,773 rows (43%), 1,008,349 rows remaining
filter: removed 260,577 rows (26%), 747,772 rows remaining
filter: removed 1,239 rows (<1%), 746,533 rows remaining
filter: removed 472 rows (<1%), 746,061 rows remaining
filter: removed 40,689 rows (5%), 705,372 rows remaining
filter: removed 304,360 rows (43%), 401,012 rows remaining
filter: removed 3 rows (<1%), 401,009 rows remaining
#test that selecting based on t_siteicdocat and t_sitewhogen gives the same result
testthat::expect_equal(
wide_spc_methods_iarc %>% filter(t_siteicdocat.1 %in% c("C34")) %>% nrow(),
wide_spc_methods_iarc %>% filter(t_sitewhogen.1 %in% c("Lung and Bronchus")) %>% nrow()
)d2_lung_wide_iarc %>%
count(p_status.1) p_status.1 n
1 Patient alive after FC (with or without following SPC after end of FU) 96276
2 Patient alive after SPC 6422
3 Patient dead after FC 287156
4 Patient dead after SPC 11155
#test that no patient status is unknown
testthat::expect_equal(
d2_lung_wide_iarc %>% filter(
!p_status.1 %in% c("Patient alive after FC (with or without following SPC after end of FU)", "Patient alive after SPC", "Patient dead after FC", "Patient dead after SPC")) %>% nrow(),
0
)
testthat::expect_equal(
d2_lung_wide_iarc %>% filter(
!p_status.1 %in% c("Patient alive after FC (with or without following SPC after end of FU)", "Patient alive after SPC", "Patient dead after FC", "Patient dead after SPC")) %>% nrow(),
0
)#test that no missings in t_sublung.1
testthat::expect_equal(
d2_lung_wide_iarc %>% filter(is.na(t_sublungiarc.1)) %>% nrow(),
0
)
#test that no missings in t_sublung.2
testthat::expect_equal(
d2_lung_wide_iarc %>% filter(t_sitewhogen.2 == "Lung and Bronchus" & is.na(t_sublungiarc.2)) %>% nrow(),
0
)
d2_lung_wide_iarc %>%
filter(t_sitewhogen.2 == "Lung and Bronchus")%>%
count(t_sublungiarcgroup.2) t_sublungiarcgroup.2 n
1 Squamous cell carcinoma (SCC) 1024
2 Adenocarcinoma (AC) 2021
3 Small cell carcinoma (SCLC) 637
4 Large cell carcinoma (LCC) 5
5 Other & unspecified (O&U) 729
#show which cases have not developed SPC before end of FU
d2_lung_wide_iarc %>%
dplyr::count(p_spc.1, p_status.1)#make detailed breakdown by t_datediag.2
d2_lung_wide_iarc %>%
dplyr::filter(sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 & p_spc.1 == "SPC developed") %>%
dplyr::count(p_spc.1, p_status.1, t_datediag.2)#prepare a check
n_patstatus_reset <- d2_lung_wide_iarc %>%
dplyr::filter((
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 & p_spc.1 == "SPC developed") |
(sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 & p_spc.1 == "SPC developed")) %>%
nrow()
n_na_before <- d2_lung_wide_iarc %>%
dplyr::filter(is.na(t_datediag.2)) %>%
nrow()
#reset all diagnosis for second cancer
d2_lung_wide_iarc <- d2_lung_wide_iarc %>%
#make tibble to avoid errors
tibble::as_tibble() %>%
#set all cols to missing for pat_status 1(alive, no SPC yet); 3(dead, no SPC)
dplyr::mutate(across(
.cols = where(~ is.double(.x) && !lubridate::is.Date(.x)) & ends_with(".2"),
.fns = ~case_when(
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 ~ NA_real_,
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 ~ NA_real_,
TRUE ~ .x))) %>%
#integer vars
dplyr::mutate(across(
.cols = where(is.integer) & ends_with(".2"),
.fns = ~case_when(
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 ~ NA_integer_,
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 ~ NA_integer_,
TRUE ~ .x))) %>%
#date vars
dplyr::mutate(across(
.cols = where(lubridate::is.Date) & ends_with(".2"),
.fns = ~case_when(
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 ~ as.Date(NA),
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 ~ as.Date(NA),
TRUE ~ .x))) %>%
#character vars
dplyr::mutate(across(
.cols = where(is.character) & ends_with(".2"),
.fns = ~case_when(
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 ~ NA_character_,
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 ~ NA_character_,
TRUE ~ .x))) %>%
#factor vars
dplyr::mutate(across(
.cols = where(is.factor) & ends_with(".2"),
.fns = ~case_when(
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 ~ NA_character_,
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 ~ NA_character_,
TRUE ~ as.character(.x)))) %>%
#spc var
dplyr::mutate(p_spc = case_when(
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 1 ~ "No SPC",
sjlabelled::as_numeric(.data$p_status.1, use.labels = TRUE) == 3 ~ "No SPC",
TRUE ~ as.character(p_spc.1)))
n_na_after <- d2_lung_wide_iarc %>%
dplyr::filter(is.na(t_datediag.2)) %>%
nrow()
#test that no more variables are set to NA than expected
testthat::expect_equal(n_patstatus_reset, (n_na_after - n_na_before))
#show that status and p_spc are coherent
d2_lung_wide_iarc %>%
dplyr::count(p_spc, p_status.1)rm(n_patstatus_reset, n_na_after, n_na_before)d2_lung_wide_iarc <- d2_lung_wide_iarc %>%
mutate(t_lung.1 = case_when(t_sitewhogen.1 == "Lung and Bronchus" ~ 1,
is.na(t_sitewhogen.1) ~ 0,
TRUE ~ 0),
t_lung.2 = case_when(t_sitewhogen.2 == "Lung and Bronchus" ~ 1,
is.na(t_sitewhogen.2) ~ 0,
TRUE ~ 0))d2_lung_wide_iarc <- d2_lung_wide_iarc %>%
mutate(t_lungiarc.1 = case_when(t_sitewhogen.1 == "Lung and Bronchus" & reg.1 == "zfkd" ~ 1,
t_sitewhogen.1 == "Lung and Bronchus" & reg.1 == "seer" & INTPRIM.1 == 1 ~ 1,
is.na(t_sitewhogen.1) ~ 0,
TRUE ~ 0),
t_lungiarc.2 = case_when(t_sitewhogen.2 == "Lung and Bronchus" & reg.1 == "zfkd" ~ 1,
t_sitewhogen.2 == "Lung and Bronchus" & reg.1 == "seer" & INTPRIM.2 == 1 ~ 1,
is.na(t_sitewhogen.2) ~ 0,
TRUE ~ 0)
)
d2_lung_wide_iarc %>%
count(t_lung.1, t_lungiarc.1, t_lung.2, t_lungiarc.2, reg.1)tmp_pids <- wide_spc_methods %>%
tidytable::bind_rows(wide_spc_methods_iarc) %>%
tidytable::distinct(p_id) %>%
tidytable::mutate(inclusion = tidytable::case_when(
p_id %in% d1_lung_wide$p_id & p_id %in% d2_lung_wide_iarc$p_id~ "both",
p_id %in% d1_lung_wide$p_id ~ "d1",
p_id %in% d2_lung_wide_iarc$p_id ~ "d2",
TRUE ~ "none"))
tmp_mismatch_ids <-
tmp_pids %>%
filter(inclusion %in% c("d1", "d2")) %>%
pull(p_id)
#testthat::test_that("number of IDs is matching",
testthat::expect_equal(
nrow(d2_lung_wide_iarc),
nrow(d1_lung_wide) + nrow(tmp_pids[inclusion == "d2"]) - nrow(tmp_pids[inclusion == "d1"])
)
testthat::expect_equal(
nrow(d2_lung_wide_iarc),
nrow(tmp_pids[inclusion == "both"]) + nrow(tmp_pids[inclusion == "d2"])
)
d_mismatch <- wide_spc_methods %>%
tidytable::bind_rows(wide_spc_methods_iarc) %>%
tidytable::filter(p_id %in% tmp_mismatch_ids) %>%
tidytable::arrange(p_id) %>%
select(p_id, reg.1, p_region.1, p_sex.1, t_siteicdo.1, t_datediag.1, p_spc.1, p_status.1,
t_hist.1, t_primiarc.1, p_futimeyrs.1, t_srvtime.1, t_siteicdo.2, t_datediag.2,
p_status.2, t_hist.2, t_srvtime.1, p_datebirth.1, p_datedeath.1, p_alive.1, p_dead.1,
SEQ_NUM.1, everything())
tmp_pids %>%
count(inclusion)#number of regions per country
d1_lung_wide %>%
distinct(p_region.1, reg.1) %>%
count(reg.1)#covered population per country
pop_methods_sum_byregion %>%
tidytable::summarize(pop = sum(population_n_per_year), .by = reg)testthat::test_that(
"Covered population refers to the same regions as included in individual data",
testthat::expect_equal(
d1_lung_wide %>% count(p_region.1) %>% filter(n > 1) %>% pull(p_region.1) %>% sort,
pop_methods_sum_byregion %>% pull(region) %>% sort
)
)Test passed 🎊
res_n_lc <- d1_lung_wide %>%
count(t_siteicdocat.1, reg.1) %>%
rename(n_d1 = n) %>%
bind_cols({d2_lung_wide_iarc %>%
count(t_siteicdocat.1, reg.1) %>%
select(n_d2 = n)})
res_n_lcn_lc_seer <- d1_lung_wide %>% filter(reg.1 == "seer") %>% nrow()
n_lc_zfkd <- d1_lung_wide %>% filter(reg.1 == "zfkd") %>% nrow()We observe a higher number of cases for SEER registries in d2 data, because of the filter for minimal survival (we deleted some tumors that don’t fulfill the IARC/IACR MP rules, thus increasing time between 1st tumor and end of FU; therefore less cases are excluded for minimum FU of 0.5 years)
n_d1_step0_ger <- wide_spc_methods %>%
filter(reg.1 == "zfkd") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
nrow()
n_d1_step1_ger <- wide_spc_methods %>%
filter(reg.1 == "zfkd") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>%
nrow()
n_d1_step2_ger <- wide_spc_methods %>%
filter(reg.1 == "zfkd") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisiana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
nrow()
n_d1_step3_ger <- wide_spc_methods %>%
filter(reg.1 == "zfkd") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisiana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
# S3: filter for patients with LC diagnosis at age 30 to 99 years.
filter(t_agediag.1 >= 30 & t_agediag.1 < 100) %>%
nrow()
n_d1_step4_ger <- wide_spc_methods %>%
filter(reg.1 == "zfkd") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisiana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
# S3: filter for patients with LC diagnosis at age 30 to 99 years.
filter(t_agediag.1 >= 30 & t_agediag.1 < 100) %>%
# S4: exclusion of unusual histology of first LC
filter(!(t_sublungiarc.1 %in% c("Excluded", "Unusual")) &
!(t_sublung.1 %in% c("Excluded - unusual"))) %>%
nrow()
n_d1_step5_ger <- wide_spc_methods %>%
filter(reg.1 == "zfkd") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisiana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
# S3: filter for patients with LC diagnosis at age 30 to 99 years.
filter(t_agediag.1 >= 30 & t_agediag.1 < 100) %>%
# S4: exclusion of unusual histology of first LC
filter(!(t_sublungiarc.1 %in% c("Excluded", "Unusual")) &
!(t_sublung.1 %in% c("Excluded - unusual"))) %>%
# S5: delete DCO at first LC
filter(t_confirm.1 != "DCO" | is.na(t_confirm.1)) %>%
nrow()
n_d1_step6_ger <- wide_spc_methods %>%
filter(reg.1 == "zfkd") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisiana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
# S3: filter for patients with LC diagnosis at age 30 to 99 years.
filter(t_agediag.1 >= 30 & t_agediag.1 < 100) %>%
# S4: exclusion of unusual histology of first LC
filter(!(t_sublungiarc.1 %in% c("Excluded", "Unusual")) &
!(t_sublung.1 %in% c("Excluded - unusual"))) %>%
# S5: delete DCO at first LC
filter(t_confirm.1 != "DCO" | is.na(t_confirm.1)) %>%
# S6: minimum follow-up without SPC, death or end of FU >= 6 months (0.5 years) after first lung cancer diagnosis
filter(p_futimeyrs.1 >= 0.5) %>%
nrow()
n_d1_step7_ger <- wide_spc_methods %>%
filter(reg.1 == "zfkd") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisiana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
# S3: filter for patients with LC diagnosis at age 30 to 99 years.
filter(t_agediag.1 >= 30 & t_agediag.1 < 100) %>%
# S4: exclusion of unusual histology of first LC
filter(!(t_sublungiarc.1 %in% c("Excluded", "Unusual")) &
!(t_sublung.1 %in% c("Excluded - unusual"))) %>%
# S5: delete DCO at first LC
filter(t_confirm.1 != "DCO" | is.na(t_confirm.1)) %>%
# S6: minimum follow-up without SPC, death or end of FU >= 6 months (0.5 years) after first lung cancer diagnosis
filter(p_futimeyrs.1 >= 0.5) %>%
# S7: exclusion of unusual histology of SPLC
filter(!(t_sublungiarc.2 %in% c("Excluded", "Unusual")) &
!(t_sublung.2 %in% c("Excluded - unusual"))) %>%
nrow()n_d1_step0_us <- wide_spc_methods %>%
filter(reg.1 == "seer") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
nrow()
n_d1_step1_us <- wide_spc_methods %>%
filter(reg.1 == "seer") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>%
nrow()
n_d1_step2_us <- wide_spc_methods %>%
filter(reg.1 == "seer") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisiana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
nrow()
n_d1_step3_us <- wide_spc_methods %>%
filter(reg.1 == "seer") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisiana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
# S3: filter for patients with LC diagnosis at age 30 to 99 years.
filter(t_agediag.1 >= 30 & t_agediag.1 < 100) %>%
nrow()
n_d1_step4_us <- wide_spc_methods %>%
filter(reg.1 == "seer") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisiana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
# S3: filter for patients with LC diagnosis at age 30 to 99 years.
filter(t_agediag.1 >= 30 & t_agediag.1 < 100) %>%
# S4: exclusion of unusual histology of first LC
filter(!(t_sublungiarc.1 %in% c("Excluded", "Unusual")) &
!(t_sublung.1 %in% c("Excluded - unusual"))) %>%
nrow()
n_d1_step5_us <- wide_spc_methods %>%
filter(reg.1 == "seer") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisiana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
# S3: filter for patients with LC diagnosis at age 30 to 99 years.
filter(t_agediag.1 >= 30 & t_agediag.1 < 100) %>%
# S4: exclusion of unusual histology of first LC
filter(!(t_sublungiarc.1 %in% c("Excluded", "Unusual")) &
!(t_sublung.1 %in% c("Excluded - unusual"))) %>%
# S5: delete DCO at first LC
filter(t_confirm.1 != "DCO" | is.na(t_confirm.1)) %>%
nrow()
n_d1_step6_us <- wide_spc_methods %>%
filter(reg.1 == "seer") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisiana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
# S3: filter for patients with LC diagnosis at age 30 to 99 years.
filter(t_agediag.1 >= 30 & t_agediag.1 < 100) %>%
# S4: exclusion of unusual histology of first LC
filter(!(t_sublungiarc.1 %in% c("Excluded", "Unusual")) &
!(t_sublung.1 %in% c("Excluded - unusual"))) %>%
# S5: delete DCO at first LC
filter(t_confirm.1 != "DCO" | is.na(t_confirm.1)) %>%
# S6: minimum follow-up without SPC, death or end of FU >= 6 months (0.5 years) after first lung cancer diagnosis
filter(p_futimeyrs.1 >= 0.5) %>%
nrow()
n_d1_step7_us <- wide_spc_methods %>%
filter(reg.1 == "seer") %>%
#S0: only keep primary LC
filter(t_sitewhogen.1 == "Lung and Bronchus") %>%
# S1: only select diagnosis 2002-2013 (as for Eberl et al. 2022 -> also done by Barclay 2019; but had to exclude 2000, 2001, and 2014, because registration in ZfKD was incomplete)
filter((t_datediag.1 >= "2002-01-01" & t_datediag.1 <= "2013-12-31")) %>% # S2: filter for the following registries of the first tumor
# SEER: all registries that started before 2002 (SEER-18) without Louisiana (Hurricane Katrina impact)
# ZfKD reasonable FU of at least 5 years, GEKID recommended):
filter(p_region.1 %in%
c(#All SEER-9 Registries
"SEER Reg 01 - San Francisco-Oakland SMSA",
"SEER Reg 02 - Connecticut",
"SEER Reg 20 - Detroit (Metropolitan)",
"SEER Reg 21 - Hawaii",
"SEER Reg 22 - Iowa",
"SEER Reg 23 - New Mexico",
"SEER Reg 25 - Seattle (Puget Sound)",
"SEER Reg 26 - Utah",
"SEER Reg 27 - Atlanta (Metropolitan)",
#Rest of SEER-13 Registries
"SEER Reg 29 - Alaska Natives",
"SEER Reg 31 - San Jose-Monterey",
"SEER Reg 35 - Los Angeles",
"SEER Reg 37 - Rural Georgia",
#Rest of SEER-18 Registries without Louisiana
"SEER Reg 41 - California excluding SF/SJM/LA",
"SEER Reg 42 - Kentucky",
"SEER Reg 44 - New Jersey",
"SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia",
#ZfKD with more than 20 years FU
"DE2 Bavaria",
"DE4 Brandenburg",
"DE5 Bremen",
"DE6 Hamburg",
"DE8 Mecklenburg-Western Pomerania",
"DE9 Lower Saxony",
"DEA3 Muenster",
"DEC Saarland",
"DED Saxony",
"DEF Schleswig-Holstein",
"DEG Thuringia"
)) %>%
# S3: filter for patients with LC diagnosis at age 30 to 99 years.
filter(t_agediag.1 >= 30 & t_agediag.1 < 100) %>%
# S4: exclusion of unusual histology of first LC
filter(!(t_sublungiarc.1 %in% c("Excluded", "Unusual")) &
!(t_sublung.1 %in% c("Excluded - unusual"))) %>%
# S5: delete DCO at first LC
filter(t_confirm.1 != "DCO" | is.na(t_confirm.1)) %>%
# S6: minimum follow-up without SPC, death or end of FU >= 6 months (0.5 years) after first lung cancer diagnosis
filter(p_futimeyrs.1 >= 0.5) %>%
# S7: exclusion of unusual histology of SPLC
filter(!(t_sublungiarc.2 %in% c("Excluded", "Unusual")) &
!(t_sublung.2 %in% c("Excluded - unusual"))) %>%
nrow()res_n_splc <- d1_lung_wide %>%
filter(t_lung.2 == 1) %>%
count(t_siteicdocat.1, reg.1) %>%
rename(n_splc_d1 = n) %>%
bind_cols({d2_lung_wide_iarc %>%
filter(t_lung.2 == 1) %>%
count(t_siteicdocat.1, reg.1) %>%
select(n_splc_d2 = n)})
res_n_splcres_n_splc_seer <- d1_lung_wide %>% filter(t_sitewhogen.2 %in% c("Lung and Bronchus") & reg.1 == "seer") %>% nrow()
res_n_splc_zfkd <- d1_lung_wide %>% filter(t_sitewhogen.2 %in% c("Lung and Bronchus") & reg.1 == "zfkd") %>% nrow()We observe that in our data in the U.S. (6877 of 263822 LC survivors [2.6066818 %])) and in Germany (542 of 135572 LC survivors [0.3997876 %])) have developed an SPLC.
#create wrapper function that calculates aggregated SIR by LC subtype
calc_count_byminfu <- function(min_futime, wide_df, by_vars = c(reg.1, p_sex.1)){
#2: calculate results
wide_df %>%
# filter according to FU stratum dataset; take t.srvtime instead of p_futimeyrs,
# because we want to keep all patients independent of SPC development
# this variable is measured in months and therefore needs to be transformed
tidytable::filter((t_srvtime.1 /12) >= min_futime & t_lung.1 == 1) %>%
#count all SPC in dataset
tidytable::summarise(
n_splc = sum(t_lung.2),
n_lc = sum(t_lung.1),
f_splc_perc = round((sum(t_lung.2) / n()) * 100, 2),
.by = {{by_vars}}) %>%
#add column with histology of index LC
mutate(min_fu = min_futime, .before = n_splc)
}d1_lung_wide %>%
select(reg.1, p_sex.1, t_sublung.1, t_sublungiarc.1, t_sublungiarcgroup.1, t_histgroupiarc.1,
t_sublung.2, t_sublungiarc.2, t_sublungiarcgroup.2, t_histgroupiarc.2) %>%
gtsummary::tbl_strata(
strata = reg.1,
~ .x %>%
gtsummary::tbl_summary(by = p_sex.1) %>%
gtsummary::modify_header(gtsummary::all_stat_cols() ~ "**{level}**") %>%
gtsummary::add_n() %>%
gtsummary::add_overall()
)| Characteristic | seer | zfkd | ||||||
|---|---|---|---|---|---|---|---|---|
| N | Overall, N = 263,8221 | Male1 | Female1 | N | Overall, N = 135,5721 | Male1 | Female1 | |
| Histologic subtype of lung cancer | 263,822 | 135,572 | ||||||
| Small-cell carcinoma | 33,028 (13%) | 15,717 (12%) | 17,311 (13%) | 24,347 (18%) | 15,813 (17%) | 8,534 (20%) | ||
| Adenocarcinoma | 103,717 (39%) | 45,949 (35%) | 57,768 (43%) | 46,890 (35%) | 28,018 (30%) | 18,872 (44%) | ||
| Squamous cell carcinoma | 54,351 (21%) | 33,149 (25%) | 21,202 (16%) | 38,695 (29%) | 31,825 (34%) | 6,870 (16%) | ||
| Carcinoid | 5,039 (1.9%) | 1,550 (1.2%) | 3,489 (2.6%) | 2,046 (1.5%) | 753 (0.8%) | 1,293 (3.0%) | ||
| Other NSCLC | 49,599 (19%) | 25,427 (19%) | 24,172 (18%) | 15,312 (11%) | 10,445 (11%) | 4,867 (11%) | ||
| Unspecified lung | 17,756 (6.7%) | 8,451 (6.5%) | 9,305 (7.0%) | 8,079 (6.0%) | 5,444 (5.9%) | 2,635 (6.1%) | ||
| Excluded - sarcoma | 318 (0.1%) | 173 (0.1%) | 145 (0.1%) | 199 (0.1%) | 98 (0.1%) | 101 (0.2%) | ||
| Excluded - unusual | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Excluded - benign | 14 (<0.1%) | 5 (<0.1%) | 9 (<0.1%) | 4 (<0.1%) | 1 (<0.1%) | 3 (<0.1%) | ||
| Histologic subtype of lung cancer IARC groups | 263,822 | 135,572 | ||||||
| Squamous cell carcinoma | 54,494 (21%) | 33,214 (25%) | 21,280 (16%) | 38,744 (29%) | 31,858 (34%) | 6,886 (16%) | ||
| Adenocarcinoma | 106,583 (40%) | 47,065 (36%) | 59,518 (45%) | 47,953 (35%) | 28,626 (31%) | 19,327 (45%) | ||
| Small cell carcinoma | 33,024 (13%) | 15,717 (12%) | 17,307 (13%) | 24,325 (18%) | 15,795 (17%) | 8,530 (20%) | ||
| Large cell carcinoma | 15,010 (5.7%) | 7,667 (5.9%) | 7,343 (5.5%) | 8,268 (6.1%) | 5,652 (6.1%) | 2,616 (6.1%) | ||
| Other specified carcinoma (incl Carcinoid) | 45,980 (17%) | 22,741 (17%) | 23,239 (17%) | 12,900 (9.5%) | 8,218 (8.9%) | 4,682 (11%) | ||
| Sarcoma | 434 (0.2%) | 241 (0.2%) | 193 (0.1%) | 265 (0.2%) | 149 (0.2%) | 116 (0.3%) | ||
| Other specified malignant neoplasm | 23 (<0.1%) | 6 (<0.1%) | 17 (<0.1%) | 22 (<0.1%) | 11 (<0.1%) | 11 (<0.1%) | ||
| Unspecified | 8,274 (3.1%) | 3,770 (2.9%) | 4,504 (3.4%) | 3,095 (2.3%) | 2,088 (2.3%) | 1,007 (2.3%) | ||
| Excluded | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Unusual | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Histologic subtype of lung cancer IARC groups (grouped) | 263,822 | 135,572 | ||||||
| Squamous cell carcinoma (SCC) | 54,494 (21%) | 33,214 (25%) | 21,280 (16%) | 38,744 (29%) | 31,858 (34%) | 6,886 (16%) | ||
| Adenocarcinoma (AC) | 106,583 (40%) | 47,065 (36%) | 59,518 (45%) | 47,953 (35%) | 28,626 (31%) | 19,327 (45%) | ||
| Small cell carcinoma (SCLC) | 33,024 (13%) | 15,717 (12%) | 17,307 (13%) | 24,325 (18%) | 15,795 (17%) | 8,530 (20%) | ||
| Large cell carcinoma (LCC) | 15,010 (5.7%) | 7,667 (5.9%) | 7,343 (5.5%) | 8,268 (6.1%) | 5,652 (6.1%) | 2,616 (6.1%) | ||
| Other & unspecified (O&U) | 54,711 (21%) | 26,758 (21%) | 27,953 (21%) | 16,282 (12%) | 10,466 (11%) | 5,816 (13%) | ||
| Excluded | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Unusual | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| IARC Histology groups (Morphology ICD-O-3 based recoding IARC 'histologically different' groups) | 263,822 | 135,572 | ||||||
| Squamous carcinomas | 54,459 (21%) | 33,211 (25%) | 21,248 (16%) | 38,804 (29%) | 31,901 (35%) | 6,903 (16%) | ||
| Basal cell carcinomas | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Adenocarcinomas | 94,685 (36%) | 42,794 (33%) | 51,891 (39%) | 44,118 (33%) | 26,483 (29%) | 17,635 (41%) | ||
| Other specific carcinomas | 89,657 (34%) | 42,060 (32%) | 47,597 (36%) | 40,484 (30%) | 25,755 (28%) | 14,729 (34%) | ||
| Unspecified carcinomas (NOS) | 16,290 (6.2%) | 8,339 (6.4%) | 7,951 (6.0%) | 8,784 (6.5%) | 6,010 (6.5%) | 2,774 (6.4%) | ||
| Sarcomas and soft tissue tumours | 317 (0.1%) | 173 (0.1%) | 144 (0.1%) | 199 (0.1%) | 98 (0.1%) | 101 (0.2%) | ||
| Mesothelioma | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Myeloid | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| B-cell neoplasms | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| T-cell and NK-cell neoplasms | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Hodgkin lymphoma | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Mast-cell Tumours | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Histiocytes and Accessory Lymphoid cells | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Unspecified haematopoietic cancers | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Kaposi sarcoma | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Other specified types of cancer | 140 (<0.1%) | 74 (<0.1%) | 66 (<0.1%) | 88 (<0.1%) | 62 (<0.1%) | 26 (<0.1%) | ||
| Unspecified types of cancer | 8,274 (3.1%) | 3,770 (2.9%) | 4,504 (3.4%) | 3,095 (2.3%) | 2,088 (2.3%) | 1,007 (2.3%) | ||
| t_sublung.2 | 6,905 | 543 | ||||||
| Adenocarcinoma | 3,009 (44%) | 1,206 (39%) | 1,803 (48%) | 204 (38%) | 142 (37%) | 62 (40%) | ||
| Carcinoid | 54 (0.8%) | 13 (0.4%) | 41 (1.1%) | 9 (1.7%) | 4 (1.0%) | 5 (3.2%) | ||
| Excluded - sarcoma | 5 (<0.1%) | 2 (<0.1%) | 3 (<0.1%) | 1 (0.2%) | 1 (0.3%) | 0 (0%) | ||
| Other NSCLC | 1,146 (17%) | 523 (17%) | 623 (16%) | 44 (8.1%) | 34 (8.8%) | 10 (6.5%) | ||
| Small-cell carcinoma | 593 (8.6%) | 256 (8.2%) | 337 (8.9%) | 125 (23%) | 87 (22%) | 38 (25%) | ||
| Squamous cell carcinoma | 1,631 (24%) | 899 (29%) | 732 (19%) | 160 (29%) | 120 (31%) | 40 (26%) | ||
| Unspecified lung | 467 (6.8%) | 216 (6.9%) | 251 (6.6%) | |||||
| Unknown | 256,917 | 127,306 | 129,611 | 135,029 | 92,009 | 43,020 | ||
| t_sublungiarc.2 | 6,905 | 543 | ||||||
| Adenocarcinoma | 3,185 (46%) | 1,272 (41%) | 1,913 (50%) | 213 (39%) | 147 (38%) | 66 (43%) | ||
| Large cell carcinoma | 319 (4.6%) | 147 (4.7%) | 172 (4.5%) | 1 (0.2%) | 1 (0.3%) | 0 (0%) | ||
| Other specified carcinoma (incl Carcinoid) | 945 (14%) | 431 (14%) | 514 (14%) | 43 (7.9%) | 32 (8.2%) | 11 (7.1%) | ||
| Other specified malignant neoplasm | 1 (<0.1%) | 0 (0%) | 1 (<0.1%) | |||||
| Sarcoma | 9 (0.1%) | 5 (0.2%) | 4 (0.1%) | 1 (0.2%) | 1 (0.3%) | 0 (0%) | ||
| Small cell carcinoma | 593 (8.6%) | 256 (8.2%) | 337 (8.9%) | 125 (23%) | 87 (22%) | 38 (25%) | ||
| Squamous cell carcinoma | 1,633 (24%) | 899 (29%) | 734 (19%) | 160 (29%) | 120 (31%) | 40 (26%) | ||
| Unspecified | 220 (3.2%) | 105 (3.4%) | 115 (3.0%) | |||||
| Unknown | 256,917 | 127,306 | 129,611 | 135,029 | 92,009 | 43,020 | ||
| t_sublungiarcgroup.2 | 6,905 | 543 | ||||||
| Adenocarcinoma (AC) | 3,185 (46%) | 1,272 (41%) | 1,913 (50%) | 213 (39%) | 147 (38%) | 66 (43%) | ||
| Large cell carcinoma (LCC) | 319 (4.6%) | 147 (4.7%) | 172 (4.5%) | 1 (0.2%) | 1 (0.3%) | 0 (0%) | ||
| Other & unspecified (O&U) | 1,175 (17%) | 541 (17%) | 634 (17%) | 44 (8.1%) | 33 (8.5%) | 11 (7.1%) | ||
| Small cell carcinoma (SCLC) | 593 (8.6%) | 256 (8.2%) | 337 (8.9%) | 125 (23%) | 87 (22%) | 38 (25%) | ||
| Squamous cell carcinoma (SCC) | 1,633 (24%) | 899 (29%) | 734 (19%) | 160 (29%) | 120 (31%) | 40 (26%) | ||
| Unknown | 256,917 | 127,306 | 129,611 | 135,029 | 92,009 | 43,020 | ||
| t_histgroupiarc.2 | 15,901 | 5,295 | ||||||
| Adenocarcinomas | 7,134 (45%) | 3,304 (42%) | 3,830 (48%) | 2,221 (42%) | 1,555 (41%) | 666 (45%) | ||
| B-cell neoplasms | 464 (2.9%) | 248 (3.1%) | 216 (2.7%) | 222 (4.2%) | 156 (4.1%) | 66 (4.4%) | ||
| Basal cell carcinomas | 6 (<0.1%) | 2 (<0.1%) | 4 (<0.1%) | |||||
| Histiocytes and Accessory Lymphoid cells | 1 (<0.1%) | 0 (0%) | 1 (<0.1%) | |||||
| Hodgkin lymphoma | 15 (<0.1%) | 11 (0.1%) | 4 (<0.1%) | 23 (0.4%) | 18 (0.5%) | 5 (0.3%) | ||
| Kaposi sarcoma | 3 (<0.1%) | 3 (<0.1%) | 0 (0%) | |||||
| Mesothelioma | 27 (0.2%) | 17 (0.2%) | 10 (0.1%) | 30 (0.6%) | 26 (0.7%) | 4 (0.3%) | ||
| Myeloid | 440 (2.8%) | 224 (2.8%) | 216 (2.7%) | 84 (1.6%) | 62 (1.6%) | 22 (1.5%) | ||
| Other specific carcinomas | 2,436 (15%) | 1,084 (14%) | 1,352 (17%) | 349 (6.6%) | 253 (6.7%) | 96 (6.4%) | ||
| Other specified types of cancer | 487 (3.1%) | 262 (3.3%) | 225 (2.8%) | 197 (3.7%) | 137 (3.6%) | 60 (4.0%) | ||
| Sarcomas and soft tissue tumours | 103 (0.6%) | 48 (0.6%) | 55 (0.7%) | 57 (1.1%) | 32 (0.8%) | 25 (1.7%) | ||
| Squamous carcinomas | 3,371 (21%) | 2,040 (26%) | 1,331 (17%) | 1,123 (21%) | 927 (24%) | 196 (13%) | ||
| T-cell and NK-cell neoplasms | 33 (0.2%) | 14 (0.2%) | 19 (0.2%) | 15 (0.3%) | 10 (0.3%) | 5 (0.3%) | ||
| Unspecified carcinomas (NOS) | 645 (4.1%) | 310 (3.9%) | 335 (4.2%) | 615 (12%) | 393 (10%) | 222 (15%) | ||
| Unspecified haematopoietic cancers | 237 (1.5%) | 130 (1.6%) | 107 (1.3%) | 64 (1.2%) | 43 (1.1%) | 21 (1.4%) | ||
| Unspecified types of cancer | 499 (3.1%) | 241 (3.0%) | 258 (3.2%) | 295 (5.6%) | 192 (5.0%) | 103 (6.9%) | ||
| Unknown | 247,921 | 122,483 | 125,438 | 130,277 | 88,593 | 41,684 | ||
| 1 n (%) | ||||||||
d1_lung_wide %>%
select(reg.1, p_sex.1, t_sublungiarcgroup.1, t_sublungiarcgroup.2) %>%
gtsummary::tbl_strata(
strata = reg.1,
~ .x %>%
gtsummary::tbl_summary(by = p_sex.1,
missing_text = "not applicable (no SPLC developed)") %>%
gtsummary::modify_header(gtsummary::all_stat_cols() ~ "**{level}**") %>%
gtsummary::add_n() %>%
gtsummary::add_overall()
) %>%
# remove empty categories of t_sublung
gtsummary::modify_table_body(
~ .x %>%
dplyr::filter(!(variable %in% c("t_sublungiarcgroup.1", "t_sublungiarcgroup.2") &
label %in% c("Excluded", "Unusual")))
)| Characteristic | seer | zfkd | ||||||
|---|---|---|---|---|---|---|---|---|
| N | Overall, N = 263,8221 | Male1 | Female1 | N | Overall, N = 135,5721 | Male1 | Female1 | |
| Histologic subtype of lung cancer IARC groups (grouped) | 263,822 | 135,572 | ||||||
| Squamous cell carcinoma (SCC) | 54,494 (21%) | 33,214 (25%) | 21,280 (16%) | 38,744 (29%) | 31,858 (34%) | 6,886 (16%) | ||
| Adenocarcinoma (AC) | 106,583 (40%) | 47,065 (36%) | 59,518 (45%) | 47,953 (35%) | 28,626 (31%) | 19,327 (45%) | ||
| Small cell carcinoma (SCLC) | 33,024 (13%) | 15,717 (12%) | 17,307 (13%) | 24,325 (18%) | 15,795 (17%) | 8,530 (20%) | ||
| Large cell carcinoma (LCC) | 15,010 (5.7%) | 7,667 (5.9%) | 7,343 (5.5%) | 8,268 (6.1%) | 5,652 (6.1%) | 2,616 (6.1%) | ||
| Other & unspecified (O&U) | 54,711 (21%) | 26,758 (21%) | 27,953 (21%) | 16,282 (12%) | 10,466 (11%) | 5,816 (13%) | ||
| t_sublungiarcgroup.2 | 6,905 | 543 | ||||||
| Adenocarcinoma (AC) | 3,185 (46%) | 1,272 (41%) | 1,913 (50%) | 213 (39%) | 147 (38%) | 66 (43%) | ||
| Large cell carcinoma (LCC) | 319 (4.6%) | 147 (4.7%) | 172 (4.5%) | 1 (0.2%) | 1 (0.3%) | 0 (0%) | ||
| Other & unspecified (O&U) | 1,175 (17%) | 541 (17%) | 634 (17%) | 44 (8.1%) | 33 (8.5%) | 11 (7.1%) | ||
| Small cell carcinoma (SCLC) | 593 (8.6%) | 256 (8.2%) | 337 (8.9%) | 125 (23%) | 87 (22%) | 38 (25%) | ||
| Squamous cell carcinoma (SCC) | 1,633 (24%) | 899 (29%) | 734 (19%) | 160 (29%) | 120 (31%) | 40 (26%) | ||
| not applicable (no SPLC developed) | 256,917 | 127,306 | 129,611 | 135,029 | 92,009 | 43,020 | ||
| 1 n (%) | ||||||||
#WIP: Present table with percentage per region and n (as gt)
res_same_hist_histgroupiarc <- d1_lung_wide %>%
filter(t_sitewhogen.2 %in% c("Lung and Bronchus")) %>%
mutate(same_hist = case_when(
p_spc == "No SPC" & !is.na(t_histgroupiarc.2) ~ "Error - t_sublung provided without SPC",
p_spc == "SPC developed" & (t_histgroupiarc.1 == t_histgroupiarc.2) ~ "same type",
p_spc == "SPC developed" & is.na(t_histgroupiarc.1) ~ "t_sublung.1 missing",
p_spc == "SPC developed" & t_sitewhogen.2 == "Lung and Bronchus" & is.na(t_histgroupiarc.2) ~ "no information t_sublung.2",
p_spc == "SPC developed" & (t_histgroupiarc.1 != t_histgroupiarc.2) ~ "different type",
TRUE ~ NA_character_))
res_same_hist_histgroupiarc %>%
janitor::tabyl(reg.1, same_hist) %>%
janitor::adorn_totals() %>%
janitor::adorn_percentages("row") %>%
janitor::adorn_pct_formatting(digits = 1)res_same_hist_histgroupiarc %>%
janitor::tabyl(p_region.1, same_hist) %>%
janitor::adorn_totals() %>%
janitor::adorn_percentages("row") %>%
janitor::adorn_pct_formatting(digits = 1)unusual_hist <- d0_lung_wide_raw %>% filter(t_sublungiarc.2 %in% c("Excluded", "Unusual") | t_sublung.2 %in% c("Excluded - unusual")) %>% distinct(t_hist.2) %>% pull() %>% as.character() %>% sort()#Germany - zfkd
tab_crude_ir_ger <- d1_lung_wide %>%
filter(reg.1 == "zfkd") %>%
mutate(count_spc = case_when(p_spc.1 == "SPC developed" ~ 1,
TRUE ~ 0)) %>%
msSPChelpR::ir_crosstab(dattype = NULL, count_var = "count_spc", xbreak_var = "none",
ybreak_vars = c("p_sex.1", "t_sublung.1"),
add_total = "top", collapse_ci = FALSE,
futime_var = "p_futimeyrs.1", alpha = 0.05) %>%
filter(yvar_name != "t_sublung.1") %>%
mutate(reg = "Germany (ZfKD)",
group = "SPC developed",
variable = "Crude incidence rate of SPC",
category = "IR [per 100,000 person-years] (95% CI)",
sex = yvar_label,
value = abs_ir,
lci = abs_ir_lci,
uci = abs_ir_uci) %>%
select(reg, category, sex, value, lci, uci)
#US - SEER
tab_crude_ir_us <- d1_lung_wide %>%
filter(reg.1 == "seer") %>%
mutate(count_spc = case_when(p_spc.1 == "SPC developed" ~ 1,
TRUE ~ 0)) %>%
msSPChelpR::ir_crosstab(dattype = NULL, count_var = "count_spc", xbreak_var = "none",
ybreak_vars = c("p_sex.1", "t_sublung.1"),
add_total = "top", collapse_ci = FALSE,
futime_var = "p_futimeyrs.1", alpha = 0.05) %>%
filter(yvar_name != "t_sublung.1") %>%
mutate(reg = "U.S. (SEER)",
group = "SPC developed",
variable = "Crude incidence rate of SPC",
category = "IR [per 100,000 person-years] (95% CI)",
sex = yvar_label,
value = abs_ir,
lci = abs_ir_lci,
uci = abs_ir_uci) %>%
select(reg, category, sex, value, lci, uci)
tab_crude_ir_gertab_crude_ir_us#prepare custom table 1
##calculate single parts (N FC, ASIR FC 2002-2013, FU time, Cases > 6months, Age, Period of Diagnosis, Mean FU time, PYAR SPC developed [N, %, Abs Inc Rate])
#e1 ASIR (usind d0 dataset - exclusion by time, region)
tab1_e1_asir_zfkd <- d0_lung_wide_raw %>%
filter(reg.1 == "zfkd" & t_lung.1 == 1) %>%
mutate(count_var = t_lung.1) %>%
msSPChelpR::asir(dattype = NULL,
std_pop = "WHO1960",
truncate_std_pop = FALSE,
futime_src = "refpop",
summarize_groups = c("region"),
count_var = "count_var",
stdpop_df = standard_population,
refpop_df = population,
region_var = "p_region.1",
age_var = "t_agegroupdiag.1",
sex_var = "p_sex.1",
year_var = "t_singleyeardiag.1",
site_var = "t_sitewhogen.1",
futime_var = "t_tmp",
pyar_var = NULL,
alpha = 0.05) %>%
filter(t_site == "Lung and Bronchus") %>%
select(sex, year, asir, asir_lci_gam, asir_uci_gam) %>%
mutate(group = "Observed cases of primary lung cancer (all independent of survival)",
variable = "Age-standardized incidence rate of lung cancer (World Standard Population 1960)",
category = paste0("ASIR in ", year, " [per 100,000] (95% CI)"),
reg = "zfkd",
value = round(asir, 1),
lci = round(asir_lci_gam, 1),
uci = round(asir_uci_gam, 1)) %>%
select(group, variable, category, reg, sex, value, lci, uci) Using person-years at risk [PYAR] from reference population as pyears for calculating incidence rates.
Be careful, in this calculation it is assumed that all included regions have collected data for the full time period: 2002 to 2013
If you have included registries with differing times, please check this assumption by looking at groups with 0 incidence and specify option 'inclusion_restrictions' if needed.
The following regions, age groups, years, sexes and ICD codes are considered: DE2 Bavaria, DE4 Brandenburg, DE5 Bremen, DE6 Hamburg, DE8 Mecklenburg-Western Pomerania, DE9 Lower Saxony, DEA3 Muenster, DEC Saarland, DED Saxony, DEF Schleswig-Holstein, DEG Thuringia 2005, 2007, 2013, 2002, 2006, 2008, 2009, 2010, 2012, 2004, 2011, 2003 15 - 19, 20 - 24, 25 - 29, 30 - 34, 35 - 39, 40 - 44, 45 - 49, 50 - 54, 55 - 59, 60 - 64, 65 - 69, 70 - 74, 75 - 79, 80 - 84, 85 - 120 Male, Female Lung and Bronchus
For the following age-groups there were no cases to be found in the dataset. Incidence and PYARs will be set to 0: 00 - 04, 05 - 09, 10 - 14
tab1_e1_asir_seer <- d0_lung_wide_raw %>%
filter(reg.1 == "seer" & t_lung.1 == 1) %>%
mutate(count_var = t_lung.1) %>%
msSPChelpR::asir(dattype = NULL,
std_pop = "WHO1960",
truncate_std_pop = FALSE,
futime_src = "refpop",
summarize_groups = c("region"),
count_var = "count_var",
stdpop_df = standard_population,
refpop_df = population_us,
region_var = "p_region.1",
age_var = "t_agegroupdiag.1",
sex_var = "p_sex.1",
year_var = "t_singleyeardiag.1",
site_var = "t_sitewhogen.1",
futime_var = "t_tmp",
pyar_var = NULL,
alpha = 0.05) %>%
filter(t_site == "Lung and Bronchus") %>%
select(sex, year, asir, asir_lci_gam, asir_uci_gam) %>%
mutate(group = "Observed cases of primary lung cancer (all independent of survival)",
variable = "Age-standardized incidence rate of lung cancer (World Standard Population 1960)",
category = paste0("ASIR in ", year, " [per 100,000] (95% CI)"),
reg = "seer",
value = round(asir, 1),
lci = round(asir_lci_gam, 1),
uci = round(asir_uci_gam, 1)) %>%
select(group, variable, category, reg, sex, value, lci, uci)Using person-years at risk [PYAR] from reference population as pyears for calculating incidence rates.
Be careful, in this calculation it is assumed that all included regions have collected data for the full time period: 2002 to 2013
If you have included registries with differing times, please check this assumption by looking at groups with 0 incidence and specify option 'inclusion_restrictions' if needed.
The following regions, age groups, years, sexes and ICD codes are considered: SEER Reg 01 - San Francisco-Oakland SMSA, SEER Reg 02 - Connecticut, SEER Reg 20 - Detroit (Metropolitan), SEER Reg 21 - Hawaii, SEER Reg 22 - Iowa, SEER Reg 23 - New Mexico, SEER Reg 25 - Seattle (Puget Sound), SEER Reg 26 - Utah, SEER Reg 27 - Atlanta (Metropolitan), SEER Reg 29 - Alaska Natives, SEER Reg 31 - San Jose-Monterey, SEER Reg 35 - Los Angeles, SEER Reg 37 - Rural Georgia, SEER Reg 41 - California excluding SF/SJM/LA, SEER Reg 42 - Kentucky, SEER Reg 44 - New Jersey, SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia 2012, 2007, 2003, 2004, 2006, 2013, 2008, 2009, 2011, 2005, 2010, 2002 00 - 04, 05 - 09, 10 - 14, 15 - 19, 20 - 24, 25 - 29, 30 - 34, 35 - 39, 40 - 44, 45 - 49, 50 - 54, 55 - 59, 60 - 64, 65 - 69, 70 - 74, 75 - 79, 80 - 84, 85 - 120 Female, Male Lung and Bronchus
tab1_e1 <- rbind(tab1_e1_asir_zfkd, tab1_e1_asir_seer)
rm(tab1_e1_asir_zfkd, tab1_e1_asir_seer)
#e2 Number of First Lung Cancers included
tab1_e2 <- d1_lung_wide %>%
count(reg = reg.1, sex = p_sex.1) %>%
mutate(freq = n / sum(n), .by = reg) %>%
mutate(group = "Cases of primary lung cancer (at least 6 months survival)",
variable = "Patients with primary LC",
category = "n (% of Total)") %>%
select(group, variable, category, reg, sex, n, freq)
#e3 Age
tab1_e3_agecat <- d1_lung_wide %>%
group_by(reg.1, p_sex.1) %>%
count(p_agefcgroup) %>%
mutate(freq = n / sum(n)) %>%
rename(category = p_agefcgroup) %>%
mutate(group = "Observed cases of primary lung cancer (at least 6 months survival)",
variable = "Age at diagnosis of LC",
category = forcats::fct_na_value_to_level(category, "Unknown")) %>%
select(group, variable, category, reg = reg.1, sex = p_sex.1, n, freq)
tab1_e3_medage <- d1_lung_wide %>%
summarize(age_median = median(t_agediag.1), .by = c(reg.1, p_sex.1)) %>%
mutate(group = "Observed cases of primary lung cancer (at least 6 months survival)",
variable = "Age at diagnosis of LC",
category = "Median age [years]",
value = round(age_median, 1)) %>%
select(group, variable, category, reg = reg.1, sex = p_sex.1, value)
tab1_e3_age <- tab1_e3_agecat %>%
bind_rows(tab1_e3_medage)
rm(tab1_e3_agecat, tab1_e3_medage)
#e4 Year of Diagnosis of FC
tab1_e4_year <- d1_lung_wide %>%
group_by(reg.1, p_sex.1) %>%
count(p_yearfcgroup) %>%
mutate(freq = n / sum(n)) %>%
rename(category = p_yearfcgroup) %>%
mutate(group = "Observed cases of primary lung cancer (at least 6 months survival)",
variable = "Year of diagnosis of LC",
category = forcats::fct_na_value_to_level(category, "Unknown")) %>%
select(group, variable, category, reg = reg.1, sex = p_sex.1, n, freq)
#e5 Subsite
tab1_e5_sub <- d1_lung_wide %>%
group_by(reg.1, p_sex.1) %>%
count(t_sublungiarcgroup.1) %>%
mutate(freq = n / sum(n)) %>%
rename(category = t_sublungiarcgroup.1) %>%
mutate(group = "Observed cases of primary lung cancer (at least 6 months survival)",
variable = "Subsite of LC",
category = forcats::fct_na_value_to_level(category, "Unknown")) %>%
select(group, variable, category, reg = reg.1, sex = p_sex.1, n, freq)
#e6 FU time (mean + PYAR sum)
tab1_e6_fusex <- d1_lung_wide %>%
summarize(fu_mean = mean(p_futimeyrs.1, na.rm = TRUE) * 12, .by = c(reg.1, p_sex.1)) %>%
mutate(group = "Observed cases of primary lung cancer (at least 6 months survival)",
variable = "Person-years at risk",
category = "Mean follow-up [months]",
value = round(fu_mean, 1)) %>%
select(group, variable, category, reg = reg.1, sex = p_sex.1, value)
tab1_e6_pyarsex <- d1_lung_wide %>%
summarize(fu_mean = sum(p_futimeyrs.1, na.rm = TRUE), .by = c(reg.1, p_sex.1)) %>%
mutate(group = "Observed cases of primary lung cancer (at least 6 months survival)",
variable = "Person-years at risk",
category = "Sum of PYAR",
value = round(fu_mean, 0)) %>%
select(group, variable, category, reg = reg.1, sex = p_sex.1, value)
tab1_e6 <- rbind(tab1_e6_fusex, tab1_e6_pyarsex)
rm(tab1_e6_fusex, tab1_e6_pyarsex)
#e7 Status (SPLC, SPC, dead, end of FU)
tab1_e7_stat <- d1_lung_wide %>%
group_by(reg.1, p_sex.1) %>%
count(p_statuseventlc) %>%
mutate(freq = n / sum(n)) %>%
rename(category = p_statuseventlc) %>%
mutate(group = "Observed cases of primary lung cancer (at least 6 months survival)",
variable = "Patient status",
category = forcats::fct_na_value_to_level(category, "Unknown")) %>%
select(group, variable, category, reg = reg.1, sex = p_sex.1, n, freq)
#e8_1 Incidence of SPLC
tab1_e8_zfkd <- d1_lung_wide %>%
filter(reg.1 == "zfkd") %>%
mutate(count_spc = case_when(t_lung.2 == 1 ~ 1,
.default = 0)) %>%
msSPChelpR::ir_crosstab(dattype = NULL, count_var = "count_spc", xbreak_var = "none",
ybreak_vars = c("p_sex.1", "t_sublung.1"),
add_total = "no", collapse_ci = FALSE, futime_var = "p_futimeyrs.1",
alpha = 0.05) %>%
filter(yvar_name != "t_sublung.1") %>%
mutate(group = "SPC developed",
variable = "Absolute incidence rate of SPC",
category = "SPLC IR [per 100,000 PYAR] (95% CI)",
reg = "zfkd",
sex = yvar_label,
value = abs_ir,
lci = abs_ir_lci,
uci = abs_ir_uci) %>%
select(group, variable, category, reg, sex, value, lci, uci)
tab1_e8_seer <- d1_lung_wide %>%
filter(reg.1 == "seer") %>%
mutate(count_spc = case_when(t_lung.2 == 1 ~ 1,
.default = 0)) %>%
msSPChelpR::ir_crosstab(dattype = NULL, count_var = "count_spc", xbreak_var = "none",
ybreak_vars = c("p_sex.1", "t_sublung.1"),
add_total = "no", collapse_ci = FALSE, futime_var = "p_futimeyrs.1",
alpha = 0.05) %>%
filter(yvar_name != "t_sublung.1") %>%
mutate(group = "SPC developed",
variable = "Absolute incidence rate of SPC",
category = "SPLC IR [per 100,000 PYAR] (95% CI)",
reg = "seer",
sex = yvar_label,
value = abs_ir,
lci = abs_ir_lci,
uci = abs_ir_uci) %>%
select(group, variable, category, reg, sex, value, lci, uci)
tab1_e8 <- rbind(tab1_e8_zfkd, tab1_e8_seer)
#e8_2 Incidence of other SPC
tab1_e8_2_zfkd <- d1_lung_wide %>%
filter(reg.1 == "zfkd") %>%
mutate(count_spc = case_when(p_spc == "SPC developed" & t_lung.2 == 0 ~ 1,
.default = 0)) %>%
msSPChelpR::ir_crosstab(dattype = NULL, count_var = "count_spc", xbreak_var = "none",
ybreak_vars = c("p_sex.1", "t_sublung.1"),
add_total = "no", collapse_ci = FALSE, futime_var = "p_futimeyrs.1",
alpha = 0.05) %>%
filter(yvar_name != "t_sublung.1") %>%
mutate(group = "SPC developed",
variable = "Absolute incidence rate of SPC",
category = "Other SPC IR [per 100,000 PYAR] (95% CI)",
reg = "zfkd",
sex = yvar_label,
value = abs_ir,
lci = abs_ir_lci,
uci = abs_ir_uci) %>%
select(group, variable, category, reg, sex, value, lci, uci)
tab1_e8_2_seer <- d1_lung_wide %>%
filter(reg.1 == "seer") %>%
mutate(count_spc = case_when(p_spc == "SPC developed" & t_lung.2 == 0 ~ 1,
.default = 0)) %>%
msSPChelpR::ir_crosstab(dattype = NULL, count_var = "count_spc", xbreak_var = "none",
ybreak_vars = c("p_sex.1", "t_sublung.1"),
add_total = "no", collapse_ci = FALSE, futime_var = "p_futimeyrs.1",
alpha = 0.05) %>%
filter(yvar_name != "t_sublung.1") %>%
mutate(group = "SPC developed",
variable = "Absolute incidence rate of SPC",
category = "Other SPC IR [per 100,000 PYAR] (95% CI)",
reg = "seer",
sex = yvar_label,
value = abs_ir,
lci = abs_ir_lci,
uci = abs_ir_uci) %>%
select(group, variable, category, reg, sex, value, lci, uci)
tab1_e8 <- rbind(tab1_e8_zfkd, tab1_e8_2_zfkd, tab1_e8_seer, tab1_e8_2_seer) %>%
arrange(desc(reg), sex)
rm(tab1_e8_zfkd, tab1_e8_2_zfkd, tab1_e8_seer, tab1_e8_2_seer)
##put single parts together and reshape
tab1_l <- bind_rows(tab1_e1, tab1_e2, tab1_e3_age, tab1_e4_year, tab1_e5_sub, tab1_e6, tab1_e7_stat, tab1_e8)
rm(tab1_e1, tab1_e2, tab1_e3_age, tab1_e4_year, tab1_e5_sub, tab1_e6, tab1_e7_stat, tab1_e8)
tab1 <- tab1_l %>%
pivot_wider(names_from = c(reg, sex),
values_from = tidyselect::all_of(c("n", "freq", "value", "lci", "uci")),
names_sep = "_") %>%
#add row for ASIR plot
add_row(group = "Observed cases of primary lung cancer (all independent of survival)",
variable = "Age-standardized incidence rate of lung cancer (World Standard Population 1960)",
category = "ASIR 2002 - 2013",
.before = 2)plot_asir_tab1 <- function(data = tab1, sex, reg, output_dir = output_dir_tables){
color_ref <- paste(reg, sex)
value_var_name <- paste0("value_", reg, "_", sex)
chart <- data %>%
filter(variable == "Age-standardized incidence rate of lung cancer (World Standard Population 1960)" & category != "ASIR 2002 - 2013") %>%
mutate(year = as.numeric(str_remove_all(category, paste(c("ASIR in ", " \\[per 100,000\\] \\(95\\% CI\\)"), collapse = "|")))) %>%
select(year, ASIR = any_of(value_var_name)) %>%
ggplot() +
geom_line(aes(x=year, y=ASIR),
linewidth = 2,
color = colors_2_sex_reg[color_ref]) +
coord_cartesian(xlim= c(2002, 2013), ylim = c(0, 45)) +
theme_minimal() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_text(size=25),
legend.position="none")
ggsave(
glue::glue("{output_dir}/asir_{reg}_{sex}.png"),
chart,
width = 12,
height = 7,
units = "cm"
)
}
plot_asir_tab1(sex = "Male", reg = "zfkd")
plot_asir_tab1(sex = "Female", reg = "zfkd")
plot_asir_tab1(sex = "Male", reg = "seer")
plot_asir_tab1(sex = "Female", reg = "seer")#take raw data but remove by age_group and delete DCO at LC
d_share_sublung <- d0_lung_wide_raw %>%
# S3: filter for patients with LC diagnosis at age 30 to 99 years.
tidylog::filter((t_agediag.1 >= 30 & t_agediag.1 < 100)) %>%
# S5: delete DCO at first LC
tidylog::filter(t_confirm.1 != "DCO" | is.na(t_confirm.1)) %>%
tidytable::select(p_id, reg.1, p_region.1, p_sex.1, t_histgroupiarc.1, t_sublung.1, t_sublungiarc.1)filter: removed 1,239 rows (<1%), 747,035 rows remaining
filter: removed 40,690 rows (5%), 706,345 rows remaining
d_share_sublung %>%
select(-p_id, -p_region.1) %>%
gtsummary::tbl_strata(
strata = reg.1,
~ .x %>%
gtsummary::tbl_summary(by = p_sex.1) %>%
gtsummary::modify_header(gtsummary::all_stat_cols() ~ "**{level}**") %>%
gtsummary::add_n() %>%
gtsummary::add_overall()
)| Characteristic | seer | zfkd | ||||||
|---|---|---|---|---|---|---|---|---|
| N | Overall, N = 481,1481 | Male1 | Female1 | N | Overall, N = 225,1971 | Male1 | Female1 | |
| IARC Histology groups (Morphology ICD-O-3 based recoding IARC 'histologically different' groups) | 481,148 | 225,197 | ||||||
| Squamous carcinomas | 88,980 (18%) | 55,537 (22%) | 33,443 (15%) | 59,285 (26%) | 48,797 (31%) | 10,488 (15%) | ||
| Basal cell carcinomas | 4 (<0.1%) | 2 (<0.1%) | 2 (<0.1%) | 2 (<0.1%) | 1 (<0.1%) | 1 (<0.1%) | ||
| Adenocarcinomas | 154,818 (32%) | 75,022 (30%) | 79,796 (35%) | 69,994 (31%) | 44,113 (28%) | 25,881 (38%) | ||
| Other specific carcinomas | 158,549 (33%) | 80,045 (32%) | 78,504 (34%) | 65,533 (29%) | 43,475 (28%) | 22,058 (33%) | ||
| Unspecified carcinomas (NOS) | 42,808 (8.9%) | 22,713 (9.0%) | 20,095 (8.7%) | 20,152 (8.9%) | 14,022 (8.9%) | 6,130 (9.1%) | ||
| Sarcomas and soft tissue tumours | 761 (0.2%) | 424 (0.2%) | 337 (0.1%) | 402 (0.2%) | 229 (0.1%) | 173 (0.3%) | ||
| Mesothelioma | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Myeloid | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| B-cell neoplasms | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| T-cell and NK-cell neoplasms | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Hodgkin lymphoma | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Mast-cell Tumours | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Histiocytes and Accessory Lymphoid cells | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Unspecified haematopoietic cancers | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Kaposi sarcoma | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Other specified types of cancer | 319 (<0.1%) | 185 (<0.1%) | 134 (<0.1%) | 176 (<0.1%) | 122 (<0.1%) | 54 (<0.1%) | ||
| Unspecified types of cancer | 34,909 (7.3%) | 17,167 (6.8%) | 17,742 (7.7%) | 9,653 (4.3%) | 6,730 (4.3%) | 2,923 (4.3%) | ||
| Histologic subtype of lung cancer | 481,148 | 225,197 | ||||||
| Small-cell carcinoma | 61,674 (13%) | 30,633 (12%) | 31,041 (13%) | 40,509 (18%) | 27,179 (17%) | 13,330 (20%) | ||
| Adenocarcinoma | 166,124 (35%) | 79,261 (32%) | 86,863 (38%) | 73,558 (33%) | 46,161 (29%) | 27,397 (40%) | ||
| Squamous cell carcinoma | 88,837 (18%) | 55,455 (22%) | 33,382 (15%) | 59,134 (26%) | 48,687 (31%) | 10,447 (15%) | ||
| Carcinoid | 5,455 (1.1%) | 1,721 (0.7%) | 3,734 (1.6%) | 2,253 (1.0%) | 862 (0.5%) | 1,391 (2.1%) | ||
| Other NSCLC | 92,950 (19%) | 50,706 (20%) | 42,244 (18%) | 26,212 (12%) | 18,426 (12%) | 7,786 (11%) | ||
| Unspecified lung | 65,188 (14%) | 32,815 (13%) | 32,373 (14%) | 23,005 (10%) | 15,875 (10%) | 7,130 (11%) | ||
| Excluded - sarcoma | 638 (0.1%) | 372 (0.1%) | 266 (0.1%) | 373 (0.2%) | 222 (0.1%) | 151 (0.2%) | ||
| Excluded - unusual | 268 (<0.1%) | 127 (<0.1%) | 141 (<0.1%) | 149 (<0.1%) | 76 (<0.1%) | 73 (0.1%) | ||
| Excluded - benign | 14 (<0.1%) | 5 (<0.1%) | 9 (<0.1%) | 4 (<0.1%) | 1 (<0.1%) | 3 (<0.1%) | ||
| Histologic subtype of lung cancer IARC groups | 481,148 | 225,197 | ||||||
| Squamous cell carcinoma | 89,064 (19%) | 55,562 (22%) | 33,502 (15%) | 59,205 (26%) | 48,736 (31%) | 10,469 (15%) | ||
| Adenocarcinoma | 169,335 (35%) | 80,532 (32%) | 88,803 (39%) | 74,848 (33%) | 46,916 (30%) | 27,932 (41%) | ||
| Small cell carcinoma | 61,661 (13%) | 30,630 (12%) | 31,031 (13%) | 40,460 (18%) | 27,142 (17%) | 13,318 (20%) | ||
| Large cell carcinoma | 40,851 (8.5%) | 21,652 (8.6%) | 19,199 (8.3%) | 19,366 (8.6%) | 13,450 (8.5%) | 5,916 (8.7%) | ||
| Other specified carcinoma (incl Carcinoid) | 84,239 (18%) | 44,938 (18%) | 39,301 (17%) | 21,065 (9.4%) | 14,151 (9.0%) | 6,914 (10%) | ||
| Sarcoma | 871 (0.2%) | 503 (0.2%) | 368 (0.2%) | 491 (0.2%) | 303 (0.2%) | 188 (0.3%) | ||
| Other specified malignant neoplasm | 193 (<0.1%) | 94 (<0.1%) | 99 (<0.1%) | 81 (<0.1%) | 40 (<0.1%) | 41 (<0.1%) | ||
| Unspecified | 34,909 (7.3%) | 17,167 (6.8%) | 17,742 (7.7%) | 9,653 (4.3%) | 6,730 (4.3%) | 2,923 (4.3%) | ||
| Excluded | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | ||
| Unusual | 25 (<0.1%) | 17 (<0.1%) | 8 (<0.1%) | 28 (<0.1%) | 21 (<0.1%) | 7 (<0.1%) | ||
| 1 n (%) | ||||||||
tab_perc_sublung1_zfkd <- d_share_sublung %>%
filter(reg.1 == "zfkd") %>%
select(-p_id, -reg.1) %>%
gtsummary::tbl_summary(.,
by = p_sex.1,
statistic = list(
gtsummary::all_categorical() ~ "{p}"
),
digits = gtsummary::all_categorical() ~ 2,
)
tab_perc_sublung1_zfkd| Characteristic | Male, N = 157,4891 | Female, N = 67,7081 |
|---|---|---|
| p_region.1 | ||
| DE2 Bavaria | 22.43 | 25.11 |
| DE4 Brandenburg | 7.58 | 6.26 |
| DE5 Bremen | 2.38 | 2.87 |
| DE6 Hamburg | 4.78 | 6.53 |
| DE8 Mecklenburg-Western Pomerania | 5.32 | 4.34 |
| DE9 Lower Saxony | 20.66 | 21.48 |
| DEA3 Muenster | 7.67 | 7.72 |
| DEC Saarland | 3.63 | 3.67 |
| DED Saxony | 11.79 | 8.37 |
| DEF Schleswig-Holstein | 7.73 | 9.11 |
| DEG Thuringia | 6.04 | 4.54 |
| IARC Histology groups (Morphology ICD-O-3 based recoding IARC 'histologically different' groups) | ||
| Squamous carcinomas | 30.98 | 15.49 |
| Basal cell carcinomas | 0.00 | 0.00 |
| Adenocarcinomas | 28.01 | 38.22 |
| Other specific carcinomas | 27.61 | 32.58 |
| Unspecified carcinomas (NOS) | 8.90 | 9.05 |
| Sarcomas and soft tissue tumours | 0.15 | 0.26 |
| Mesothelioma | 0.00 | 0.00 |
| Myeloid | 0.00 | 0.00 |
| B-cell neoplasms | 0.00 | 0.00 |
| T-cell and NK-cell neoplasms | 0.00 | 0.00 |
| Hodgkin lymphoma | 0.00 | 0.00 |
| Mast-cell Tumours | 0.00 | 0.00 |
| Histiocytes and Accessory Lymphoid cells | 0.00 | 0.00 |
| Unspecified haematopoietic cancers | 0.00 | 0.00 |
| Kaposi sarcoma | 0.00 | 0.00 |
| Other specified types of cancer | 0.08 | 0.08 |
| Unspecified types of cancer | 4.27 | 4.32 |
| Histologic subtype of lung cancer | ||
| Small-cell carcinoma | 17.26 | 19.69 |
| Adenocarcinoma | 29.31 | 40.46 |
| Squamous cell carcinoma | 30.91 | 15.43 |
| Carcinoid | 0.55 | 2.05 |
| Other NSCLC | 11.70 | 11.50 |
| Unspecified lung | 10.08 | 10.53 |
| Excluded - sarcoma | 0.14 | 0.22 |
| Excluded - unusual | 0.05 | 0.11 |
| Excluded - benign | 0.00 | 0.00 |
| Histologic subtype of lung cancer IARC groups | ||
| Squamous cell carcinoma | 30.95 | 15.46 |
| Adenocarcinoma | 29.79 | 41.25 |
| Small cell carcinoma | 17.23 | 19.67 |
| Large cell carcinoma | 8.54 | 8.74 |
| Other specified carcinoma (incl Carcinoid) | 8.99 | 10.21 |
| Sarcoma | 0.19 | 0.28 |
| Other specified malignant neoplasm | 0.03 | 0.06 |
| Unspecified | 4.27 | 4.32 |
| Excluded | 0.00 | 0.00 |
| Unusual | 0.01 | 0.01 |
| 1 % | ||
This simulation simply assumes that if SPLC had the same histology group distribution as the first LC then, we will observe:
SIR(sim_real1.0) = O / E = (1 * O) / (1 * E) #assuming that E and O are the same for SIR = 1, but there is a correction factor for combinations not possible x(not_possible) = E * x(not_possible) / E #x(not_possible is defined by the combinations of impossible histology group combinations, i.e. histA cannot follow histA, but only histB, C and so on) = (p_histA * (1-p_histA) + p_histB * (1-p_histB) +…) = (p_histA - p_histA^2 + p_histB - p_histB^2 + …) #given that sum of all p_histA,B,C equals 1 = 1 - sum(p_histA^2, p_histB^2, …)
d_share_sublung %>%
filter(reg.1 == "zfkd") %>%
select(p_sex.1, t_histgroupiarc.1) %>%
#create summary table above
gtsummary::tbl_summary(.,
by = p_sex.1,
statistic = list(
gtsummary::all_categorical() ~ "{p}"
),
digits = gtsummary::all_categorical() ~ 2,
) %>%
#extract data
gtsummary::as_tibble() %>%
rename(female = contains("Female"),
male = contains("Male", ignore.case = FALSE)) %>%
#calculate the share of same_hist cancers by squaring the columns
mutate(male_sq = (as.numeric(male)/100)^2,
female_sq = (as.numeric(female)/100)^2) %>%
summarize(sir_male_sim = 1 - sum(male_sq, na.rm = TRUE),
sir_female_sim = 1 - sum(female_sq, na.rm = TRUE))Strategy:
d_histfreq <- d_share_sublung %>%
filter(reg.1 == "zfkd") %>%
select(p_sex.1, t_histgroupiarc.1) %>%
#create summary table above
gtsummary::tbl_summary(.,
by = p_sex.1,
statistic = list(
gtsummary::all_categorical() ~ "{p}"
),
digits = gtsummary::all_categorical() ~ 3,
) %>%
#extract data
gtsummary::as_tibble() %>%
rename(t_histgroupiarc = contains("Characteristic"),
Female = contains("Female"),
Male = contains("Male", ignore.case = FALSE)
) %>%
filter(t_histgroupiarc != "IARC Histology groups (Morphology ICD-O-3 based recoding IARC 'histologically different' groups)") %>%
pivot_longer(!t_histgroupiarc, names_to = "sex", values_to = "freq") %>%
mutate(freq = as.numeric(freq) / 100,
reg = "zfkd",
x_factor = 1-freq) %>%
select(-freq) %>%
#now add the same for seer
bind_rows({d_share_sublung %>%
filter(reg.1 == "seer") %>%
select(p_sex.1, t_histgroupiarc.1) %>%
#create summary table above
gtsummary::tbl_summary(.,
by = p_sex.1,
statistic = list(
gtsummary::all_categorical() ~ "{p}"
),
digits = gtsummary::all_categorical() ~ 3,
) %>%
#extract data
gtsummary::as_tibble() %>%
rename(t_histgroupiarc = contains("Characteristic"),
Female = contains("Female"),
Male = contains("Male", ignore.case = FALSE)
) %>%
filter(t_histgroupiarc != "IARC Histology groups (Morphology ICD-O-3 based recoding IARC 'histologically different' groups)") %>%
pivot_longer(!t_histgroupiarc, names_to = "sex", values_to = "freq") %>%
mutate(freq = as.numeric(freq) / 100,
reg = "seer",
x_factor = 1-freq) %>%
select(-freq)})simcalc_sir_n_sum_lc <- function(data, race_var = "p_race.1", histgroup_var = "t_histgroupiarc.1",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc,
histfreq_df = d_histfreq, sir_real = 1.0, sum_histgroup = TRUE){
res_sir <- data %>%
#1: count all SPC in dataset
tidytable::mutate(count_spc = case_when(p_spc == "SPC developed" & t_lung.2 == 1 ~ 1,
.default = 0)) %>%
#2: calculate SIR
msSPChelpR::sir_byfutime(., dattype = NULL,
ybreak_vars = c("reg.1"),
xbreak_var = histgroup_var, futime_breaks = c(.5, 1, 3, 5, 10, Inf),
count_var = "count_spc", refrates_df = refrates_used,
calc_total_row = FALSE, calc_total_fu = TRUE,
region_var = "p_region.1", age_var = "t_agegroupdiag.1", sex_var = "p_sex.1",
year_var = "t_yeardiag.1", race_var = race_var, site_var = "t_sitewhogen.2",
futime_var = "p_futimeyrs.1",
alpha = 0.05
) %>%
tidytable::filter(t_site %in% "Lung and Bronchus")
#create temporary objects with error messages from sir_byfutime function
prob_sir_pyar <- attr(res_sir, "problems_pyar")
prob_sir_not_empty <- attr(res_sir, "problems_not_empty")
prob_sir_missing_ref_strata <- attr(res_sir, "problems_missing_ref_strata")
prob_sir_missing_futime <- attr(res_sir, "problems_missing_futime")
prob_sir_missing_count_strata <- attr(res_sir, "problems_missing_count_strata")
prob_sir_missing_fu_strata <- attr(res_sir, "problems_missing_fu_strata")
prob_sir_duplicate_ref_strata <- attr(res_sir, "problems_duplicate_ref_strata")
prob_sir_notes_refcases <- attr(res_sir, "notes_refcases")
#3: check that no unexpected problems occurred
testthat::test_that(
"Check that no unexpected problems occurred in SIR results",
testthat::expect_true(
is.null(prob_sir_duplicate_ref_strata) &
is.null(prob_sir_missing_count_strata) &
is.null(prob_sir_missing_fu_strata) &
is.null(prob_sir_missing_futime) &
is.null(prob_sir_not_empty) &
is.null(prob_sir_pyar) &
is.null(prob_sir_notes_refcases)
)
)
#check missing_refrates
if(!is.null(prob_sir_missing_ref_strata)){
testthat::test_that(
"Check that no missing refrates occurred for lung cancer",
testthat::expect_equal(
0,
prob_sir_missing_ref_strata %>%
filter(t_site == "Lung and Bronchus") %>%
nrow()
)
)
}
#4: assign factors for same site histology and recalculate O by E*factor*sir_real
mod_sir <- res_sir %>%
left_join(histfreq_df, by = join_by(sex == sex, reg == reg, xvar_label == t_histgroupiarc)) %>%
mutate(observed = expected * x_factor * sir_real) %>%
select(-x_factor)
#5: aggregate results
sum_xbreak_var <- if(histgroup_var == "none"){"none"}else{"xvar_name"}
sum_sir <- mod_sir %>%
msSPChelpR::summarize_sir_results(.,
summarize_groups = c("region", "age", "year",
if(!is.null(race_var)){"race"}),
summarize_site = FALSE,
output = "long", output_information = "reduced",
add_total_row = "no", add_total_fu = "end",
collapse_ci = FALSE, shorten_total_cols = TRUE,
fubreak_var_name = "fu_time", ybreak_var_name = "yvar_name",
xbreak_var_name = sum_xbreak_var, site_var_name = "t_site",
alpha = 0.05
)
#add column with histology of index LC
if(histgroup_var == "none"){
sum_sir <- sum_sir %>%
tidytable::mutate(t_sublung.1 = "Total - All lung cancers", .before = age)
} else{
if(sum_histgroup == FALSE){
sum_sir <- sum_sir %>%
tidytable::mutate(t_sublung.1 = xvar_label, .before = age) %>%
#remove xvar columns if xbreak_var is used
tidytable::select(-xvar_name, -xvar_label)
}
}
#add Total by sex, reg, fu_time
if(sum_histgroup){
sum_sir <- sum_sir %>%
tidytable::summarize(tidytable::across(
.cols = c(observed, expected, pyar, n_base),
.fns = ~ sum(.x, na.rm = TRUE),
.names = "group_{.col}") ,
.by = c(age, region, sex, year, race, yvar_name, yvar_label, yvar_sort, yvar_sort_levels,
fu_time, fu_time_sort, t_site)) %>%
#calculate sir
tidytable::mutate(
sir_real = sir_real,
sir = .data$group_observed / .data$group_expected,
sir_lci = (stats::qchisq(p = alpha / 2, df = 2 * .data$group_observed) / 2) / .data$group_expected,
sir_uci = (stats::qchisq(p = 1 - alpha / 2, df = 2 * (.data$group_observed + 1)) / 2) / .data$group_expected,
) %>%
tidytable::arrange(sex, yvar_sort_levels, fu_time_sort) %>%
tidytable::select(sex, reg = yvar_label, sir_real, fu_time, observed = group_observed, expected = group_expected, sir, sir_lci, sir_uci, pyar = group_pyar, n_base = group_n_base) %>%
tidytable::mutate(tidytable::across(.cols = c(pyar, sir, sir_lci, sir_uci),
.fns = ~ round(.x, 2)))
}
#add detailed results as attribute to sum_sir
attr(sum_sir, "res_sir") <- res_sir
sum_sir
}refrates_tmp_methods_lcsubtype_dco_lc <- refrates_methods_lcsubtype_histgroupiarc_dco %>%
filter(t_lcsubtype == "Total - All histological subtypes")
res_sum_sim2_sir <- d1_lung_wide %>%
simcalc_sir_n_sum_lc(sir_real = 1.0) %>%
bind_rows(d1_lung_wide %>% simcalc_sir_n_sum_lc(sir_real = 0.5)) %>%
bind_rows(d1_lung_wide %>% simcalc_sir_n_sum_lc(sir_real = 2.0)) %>%
bind_rows(d1_lung_wide %>% simcalc_sir_n_sum_lc(sir_real = 3.38)) %>%
bind_rows(d1_lung_wide %>% simcalc_sir_n_sum_lc(sir_real = 4.85))[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 31s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 26s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 20s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 14s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 9s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 244080 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 🥳
Test passed 🎊
[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 30s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 25s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 21s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 14s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 244080 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 🌈
Test passed 🥇
[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 30s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 25s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 20s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 14s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 244080 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 🎊
Test passed 😀
[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 29s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 25s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 20s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 14s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 244080 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 🌈
Test passed 🎊
[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 29s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 25s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 20s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 14s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 244080 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 🥇
Test passed 😀
testthat::test_that(
"Function `simcalc_sir_n_sum_lc()` works correctly",
testthat::expect_equal(
{d_histfreq_tmp <- d_histfreq %>% mutate(x_factor = 1.0)
d1_lung_wide %>%
simcalc_sir_n_sum_lc(.,
race_var = "p_race.1",
histgroup_var = "t_histgroupiarc.1",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc,
histfreq_df = d_histfreq_tmp,
sir_real = 2.0,
) %>%
pull(sir) %>%
unique()
},
2
)
)Test passed 🎊
Test passed 🥇
Test passed 😸
tab2 <- res_sum_sim2_sir %>%
filter(reg == "zfkd") %>%
filter(fu_time == "Total 0.5 to Inf years") %>%
select(-observed) %>%
pivot_wider(names_from = c(sir_real),
values_from = tidyselect::all_of(c("sir", "sir_lci", "sir_uci")),
names_sep = "_") %>%
#columns to add the plot
mutate(plot_sir_1 = sir_1, .after = sir_1) %>%
mutate(plot_sir_2 = sir_2, .after = sir_2) %>%
mutate(plot_sir_3.38 = sir_3.38, .after = sir_3.38) %>%
mutate(plot_sir_4.85 = sir_4.85, .after = sir_4.85) %>%
#columns for target value
mutate(target_1 = 1, .after = plot_sir_1) %>%
mutate(target_2 = 2, .after = plot_sir_2) %>%
mutate(target_3.38 = 3.38, .after = plot_sir_3.38) %>%
mutate(target_4.85 = 4.85, .after = plot_sir_4.85) #adapted from gtExtras::gt_plt_bullet
gt_plt_bullet_mod <- function(gt_object, column = NULL, target = NULL, width = 65,
xlim = c(0, max(c(all_vals, target_vals), na.rm = TRUE)),
palette = c("grey", "red"), palette_col = NULL, background = "grey") {
stopifnot("'gt_object' must be a 'gt_tbl', have you accidentally passed raw data?" = "gt_tbl" %in% class(gt_object))
stopifnot("'palette' must be 2 colors" = length(palette) == 2)
# extract the values from specified columns
all_vals <- gtExtras::gt_index(gt_object, {{ column }})
target_vals <- gtExtras::gt_index(gt_object, {{ target }})
if(length(all_vals) == 0) {
return(gt_object)
}
rng_val <- range(c(all_vals, target_vals), na.rm = TRUE)
length_val <- length(all_vals)
col_bare <- gtExtras::gt_index(gt_object, {{ column }}, as_vector = FALSE) %>%
dplyr::select({{ column }}) %>%
names()
if(!rlang::quo_is_null(rlang::enquo(palette_col))) {
bar_pal <- gtExtras::gt_index(gt_object, {{ palette_col }})
tar_pal <- rep(palette[2], length(bar_pal))
} else {
tar_pal <- rep(palette[2], length_val)
bar_pal <- rep(palette[1], length_val)
}
tab_out <- gt_object %>%
text_transform(
locations = cells_body({{ column }}),
fn = function(x) {
bar_fx <- function(vals, target_vals, tar_pal, bar_pal) {
if(is.na(vals) | is.null(vals)) {
return("<div></div>")
}
if(is.na(target_vals)) {
stop("Target Column not coercible to numeric, please create and supply an unformatted column ahead of time with gtExtras::gt_duplicate_columns()",
call. = FALSE
)
}
if(is.na(vals)) {
stop("Column not coercible to numeric, please create and supply an unformatted column ahead of time with gtExtras::gt_duplicate_columns()",
call. = FALSE
)
}
plot_out <- ggplot(data = NULL, aes(x = vals, y = factor("1"))) +
geom_col(width = 0.1, color = bar_pal, fill = bar_pal) +
geom_vline(
xintercept = target_vals, color = tar_pal, linewidth = 1.5,
alpha = 0.7
) +
geom_vline(xintercept = 0, color = "black", linewidth = 1) +
theme_void() +
coord_cartesian(xlim = xlim) +
#change expansion to 0
scale_x_continuous(expand = expansion(mult = c(0, 0))) +
scale_y_discrete(expand = expansion(mult = c(0.0, 0.0))) +
theme_void() +
theme(
legend.position = "none",
plot.margin = margin(0, 0, 0, 0, "pt"),
#add background color
plot.background = element_rect(
fill = background,
colour = background,
linewidth = NULL,
linetype = NULL),
panel.background = element_blank()
)
out_name <- file.path(tempfile(
pattern = "file", tmpdir = tempdir(),
fileext = ".svg"
))
ggsave(out_name,
plot = plot_out, dpi = 25.4, height = 5, width = width,
units = "mm", device = "svg"
)
img_plot <- readLines(out_name) %>%
paste0(collapse = "") %>%
gt::html()
on.exit(file.remove(out_name), add = TRUE)
img_plot
}
tab_built <- mapply(bar_fx, all_vals, target_vals, tar_pal, bar_pal)
tab_built
}
) %>%
gt::cols_align(align = "left", columns = {{ column }})
if(!rlang::quo_is_null(rlang::enquo(palette_col))) {
tab_out %>%
gt::cols_hide({{ palette_col }})
} else {
tab_out
}
}Plan for main analyses:
calc_sir_n_sum_lc <- function(data, race_var, xbreak_var, site_var = "t_sitewhogen.2", keep_t_site, refrates_used){
res_sir <- data %>%
#1: count all SPC in dataset
tidytable::mutate(count_spc = case_when(
p_spc == "SPC developed" & t_sitewhogen.2 == "Lung and Bronchus" ~ 1,
.default = 0)) %>%
#2: calculate SIR
msSPChelpR::sir_byfutime(., dattype = NULL,
ybreak_vars = c("reg.1"),
xbreak_var = xbreak_var, futime_breaks = c(.5, 1, 3, 5, 10, Inf),
count_var = "count_spc", refrates_df = refrates_used,
calc_total_row = FALSE, calc_total_fu = TRUE,
region_var = "p_region.1", age_var = "t_agegroupdiag.1", sex_var = "p_sex.1",
year_var = "t_yeardiag.1", race_var = race_var, site_var = site_var,
futime_var = "p_futimeyrs.1",
alpha = 0.05
) %>%
tidytable::filter(t_site %in% keep_t_site)
#create temporary objects with error messages from sir_byfutime function
prob_sir_pyar <- attr(res_sir, "problems_pyar")
prob_sir_not_empty <- attr(res_sir, "problems_not_empty")
prob_sir_missing_ref_strata <- attr(res_sir, "problems_missing_ref_strata")
prob_sir_missing_futime <- attr(res_sir, "problems_missing_futime")
prob_sir_missing_count_strata <- attr(res_sir, "problems_missing_count_strata")
prob_sir_missing_fu_strata <- attr(res_sir, "problems_missing_fu_strata")
prob_sir_duplicate_ref_strata <- attr(res_sir, "problems_duplicate_ref_strata")
prob_sir_notes_refcases <- attr(res_sir, "notes_refcases")
#3: check that no unexpected problems occurred
if(site_var == "t_sitewhogen.2"){
testthat::test_that(
"Check that no unexpected problems occurred in SIR results",
testthat::expect_true(
is.null(prob_sir_duplicate_ref_strata) &
is.null(prob_sir_missing_count_strata) &
is.null(prob_sir_missing_fu_strata) &
is.null(prob_sir_missing_futime) &
is.null(prob_sir_not_empty) &
is.null(prob_sir_pyar) &
is.null(prob_sir_notes_refcases)
)
)}else{
testthat::test_that(
"Check that no unexpected problems occurred in SIR results",
testthat::expect_true(
is.null(prob_sir_duplicate_ref_strata) &
is.null(prob_sir_missing_count_strata) &
is.null(prob_sir_missing_fu_strata) &
is.null(prob_sir_missing_futime) &
is.null(prob_sir_not_empty) &
is.null(prob_sir_pyar)
)
)
}
#check missing_refrates
if(!is.null(prob_sir_missing_ref_strata) & site_var == "t_sitewhogen.2"){
testthat::test_that(
"Check that no missing refrates occurred for lung cancer",
testthat::expect_equal(
0,
prob_sir_missing_ref_strata %>%
filter(t_site == "Lung and Bronchus") %>%
nrow()
)
)
}
#check notes_refcases
if(!is.null(prob_sir_notes_refcases)){
testthat::test_that(
"Check that notes refrates are singular for long follow-up times (more than 5 years, i.e. fu_time_sort >= 4",
testthat::expect_equal(
0,
prob_sir_notes_refcases %>%
filter((i_observed > 1 | fu_time_sort < 3)) %>%
nrow()
)
)
}
sum_xbreak_var <- if(xbreak_var == "none"){"none"}else{"xvar_name"}
#4: aggregate results
sum_sir <- res_sir %>%
msSPChelpR::summarize_sir_results(.,
summarize_groups = c("region", "age", "year",
if(!is.null(race_var)){"race"}),
summarize_site = FALSE,
output = "long", output_information = "reduced",
add_total_row = "no", add_total_fu = "end",
collapse_ci = FALSE, shorten_total_cols = TRUE,
fubreak_var_name = "fu_time", ybreak_var_name = "yvar_name",
xbreak_var_name = sum_xbreak_var, site_var_name = "t_site",
alpha = 0.05
)
#add column with histology of index LC
if(xbreak_var == "none"){
sum_sir <- sum_sir %>%
tidytable::mutate(t_lcsubtype = "Total - All lung cancers", .before = age)
} else{
sum_sir <- sum_sir %>%
tidytable::mutate(t_lcsubtype = xvar_label, .before = age) %>%
#remove xvar columns if xbreak_var is used
tidytable::select(-xvar_name, -xvar_label)
}
#add detailed results as attribute to sum_sir
attr(sum_sir, "res_sir") <- res_sir
sum_sir
}#extract res_sir from sum_sir objects created by calc_sir_n_sum_lc()
extract_res_sir <- function(sum_sir){
attributes(sum_sir)$res_sir
}#create wrapper function that calculates aggregated SIR by LC subtype
calc_sir_sublung <- function(histo, wide_df, ref_df, race_var, ybreak_vars = c("reg.1"), xbreak_var = "none", site_var = "t_sublung", version = c("A_histo_specific", "B_any_other_histo", "C_any_other_histo2"), quiet = FALSE){
site_var1 <- rlang::sym(paste0(site_var, ".1"))
site_var2 <- rlang::sym(paste0(site_var, ".2"))
#for version A, keep value of histologic subtype; for version B/C, we need to match to all other subtypes ("excluding histo")
site_var_df_match <- if(version == "A_histo_specific"){rlang::as_name(site_var2)}else{
if(version %in% c("B_any_other_histo", "C_any_other_histo2")){"t_sub2_excl"}}
#determine valid sites for Lung cancer
valid_sites1 <- wide_df %>% filter(t_lung.1 == 1) %>% distinct(!!site_var1) %>% pull() %>% as.character() %>% sort()
valid_sites2 <- wide_df %>% filter(t_lung.2 == 1) %>% distinct(!!site_var2) %>% pull() %>% as.character() %>% sort()
valid_sites <- c(valid_sites1, valid_sites2) %>% unique() %>% sort()
cli::cli_progress_message(paste0("Calculating SIR for LC: ", histo))
#0: if version B, we need to calc t_sub2_excl
if(version %in% c("B_any_other_histo", "C_any_other_histo2")){
wide_df <- wide_df %>%
tidytable::mutate(t_sub2_excl = case_when(
!!site_var2 == !!site_var1 & !!site_var1 == histo ~ !!site_var2,
!!site_var2 != !!site_var1 ~ paste("excluding", histo),
TRUE ~ NA_character_
))
}
#depending on version, determine histology codes to keep in SIR results
keep_hist_sir <- if(version == "A_histo_specific"){wide_df %>% distinct(!!site_var1) %>% pull() %>% as.character() %>% sort()}else{
if(version %in% c("B_any_other_histo", "C_any_other_histo2")){c(histo, paste("excluding", histo))}}
#1: filter ref_df
ref_df <- ref_df %>%
tidytable::select(-t_site) %>%
tidytable::rename(t_site = t_lcsubtype)
#in Version C, mutate t_site in ref_df so that everything behind [[ ignored
if(version %in% c("C_any_other_histo2")){
ref_df <- ref_df %>%
tidytable::mutate(t_site = sub(" \\[\\[.*$", "", t_site))
}
if(!quiet){
res_sir <- wide_df %>%
tidylog::filter(!!site_var1 == histo)
}else{
res_sir <- wide_df %>%
tidytable::filter(!!site_var1 == histo)
}
#2: calculate results
if(version %in% c("A_histo_specific", "B_any_other_histo")){
res_sir <- res_sir %>%
#count all SPC in dataset
mutate(count_spc = case_when(
p_spc == "SPC developed" & t_sitewhogen.2 == "Lung and Bronchus" ~ 1,
.default = 0))
}else{
if(version == "C_any_other_histo2"){
res_sir <- res_sir %>%
#count all SPC in dataset
mutate(count_spc = case_when(
p_spc == "SPC developed" & t_sitewhogen.2 == "Lung and Bronchus" ~ 1 &
#only count SPC in different t_histgroupiarc
t_histgroupiarc.2 != t_histgroupiarc.1,
.default = 0))
}
}
res_sir <- res_sir %>%
msSPChelpR::sir_byfutime(., dattype = NULL,
ybreak_vars = ybreak_vars,
xbreak_var = xbreak_var, futime_breaks = c(.5, 1, 3, 5, 10, Inf),
count_var = "count_spc", refrates_df = ref_df,
calc_total_row = FALSE, calc_total_fu = TRUE,
region_var = "p_region.1", age_var = "t_agegroupdiag.1", sex_var = "p_sex.1",
year_var = "t_yeardiag.1", race_var = race_var, site_var = site_var_df_match,
futime_var = "p_futimeyrs.1",
alpha = 0.05,
quiet = quiet
) %>%
tidytable::filter(t_site %in% keep_hist_sir)
#create temporary objects with error messages from sir_byfutime function
prob_sir_pyar <- attr(res_sir, "problems_pyar")
prob_sir_not_empty <- attr(res_sir, "problems_not_empty")
prob_sir_missing_ref_strata <- attr(res_sir, "problems_missing_ref_strata")
prob_sir_missing_futime <- attr(res_sir, "problems_missing_futime")
prob_sir_missing_count_strata <- attr(res_sir, "problems_missing_count_strata")
prob_sir_missing_fu_strata <- attr(res_sir, "problems_missing_fu_strata")
prob_sir_duplicate_ref_strata <- attr(res_sir, "problems_duplicate_ref_strata")
prob_sir_notes_refcases <- attr(res_sir, "notes_refcases")
#3: check that no unexpected problems occurred
testthat::test_that(
"Check that no unexpected problems occurred in SIR results",
testthat::expect_true(
is.null(prob_sir_duplicate_ref_strata) &
is.null(prob_sir_missing_count_strata) &
is.null(prob_sir_missing_fu_strata) &
is.null(prob_sir_missing_futime) &
is.null(prob_sir_not_empty) &
is.null(prob_sir_pyar)
)
)
#check missing_refrates
if(!is.null(prob_sir_missing_ref_strata) && site_var == "t_sublung"){
testthat::test_that(
"Check that missing ref_strata only occur for irrelevant strata in combination of mismatching sublung variables, e.g. t_sublung.1 contains excluded sarcoma, although data has been filtered for t_sublungiarc.1",
testthat::expect_equal(
0, prob_sir_missing_ref_strata %>%
filter(t_site != "Excluded - sarcoma") %>%
nrow()
)
)
}
if(!is.null(prob_sir_missing_ref_strata) && !(site_var %in% c("t_sublung"))){
testthat::test_that(
"Check that missing ref_strata only occur for irrelevant strata in combination of LC with site_var",
testthat::expect_equal(
0, prob_sir_missing_ref_strata %>%
filter(t_site %in% valid_sites) %>%
nrow()
)
)
}
#check notes_refcases
if(!is.null(prob_sir_notes_refcases) && site_var != "t_hist"){
testthat::test_that(
"Check that notes refrates are singular for long follow-up times (more than 5 years, i.e. fu_time_sort >= 4",
testthat::expect_equal(
0,
prob_sir_notes_refcases %>%
filter((i_observed > 1 | fu_time_sort < 4) &
#exclude specifically checked strata
!(age == "70 - 74" & region == "SEER Reg 21 - Hawaii" &
sex == "Female" & t_site %in% c("Large cell carcinoma")) &
!(age == "60 - 64" & region == "SEER Reg 22 - Iowa" &
sex == "Male" & t_site %in% c("Squamous cell carcinoma", "Squamous carcinomas", "8070")) &
!(age == "75 - 79" & region == "SEER Reg 31 - San Jose-Monterey" &
sex == "Male" & t_site %in% c("Unspecified types of cancer")) &
!(age == "70 - 74" & region == "SEER Reg 35 - Los Angeles" &
sex == "Female" & t_site == "Carcinoid") &
!(age == "55 - 59" & region == "SEER Reg 37 - Rural Georgia" &
sex == "Female" & t_site %in% c("Squamous cell carcinoma", "Squamous carcinomas", "8070")) &
!(age == "60 - 64" & region == "SEER Reg 37 - Rural Georgia" &
sex == "Male" & t_site %in% c("Unspecified types of cancer")) &
!(age == "60 - 64" & region == "SEER Reg 42 - Kentucky" &
sex == "Female" & t_site %in% c("Squamous cell carcinoma", "Squamous carcinomas", "8804", "8070")) &
!(age == "65 - 69" & region == "SEER Reg 42 - Kentucky" &
sex == "Female" & t_site %in% c("Large cell carcinoma", "8972"))
) %>%
nrow()
)
)
}
res_sir %>%
#3: aggregate results
msSPChelpR::summarize_sir_results(.,
summarize_groups = c("region", "age", "year",
if(!is.null(race_var)){"race"}),
summarize_site = FALSE,
output = "long", output_information = "reduced",
add_total_row = "no", add_total_fu = "end",
collapse_ci = FALSE, shorten_total_cols = TRUE,
fubreak_var_name = "fu_time", ybreak_var_name = "yvar_name",
xbreak_var_name = if(xbreak_var == "none"){"none"}else{"xvar_name"},
site_var_name = "t_site",
alpha = 0.05
) %>%
#add column with histology of index LC
mutate(!!site_var1 := histo, .before = age)
}sum_sir_results_sum <- function(sum_df, remaining_by_vars){
if(!is.character(remaining_by_vars)){
rlang::abort("argument `remaining_by_vars` must be character vector")
}
if(!("fu_time" %in% remaining_by_vars)){
if("fu_time" %in% colnames(sum_df)){
sum_df <- sum_df %>%
tidytable::filter(!str_detect(fu_time, "^Total"))
rlang::inform(c(
"`fu_time`is not among `remaining_by_vars.` This function has filtered out totals before aggregating",
" "))
}else{rlang::inform(c(
"`fu_time`is not among `remaining_by_vars` and cannot be found among `colnames(sum_df)`.",
"Make sure that you have filtered out totals if you want to aggregate multiple follow-up times",
" "))
}
fu_sum <- TRUE
}else{
fu_sum <- FALSE
}
if(!("yvar_label" %in% remaining_by_vars)){
yvar_sum <- TRUE
}else{
yvar_sum <- FALSE
}
if(!("t_site" %in% remaining_by_vars)){
rlang::inform(c("`t_site`is not among remaining_by_vars",
"Check pyar calculations!",
" "))
site_sum <- TRUE
}else{
site_sum <- FALSE
}
sum_df %>%
tidytable::summarize(
observed = sum(observed),
expected = sum(expected),
pyar = tidytable::case_when(site_sum == FALSE ~ sum(pyar),
site_sum == TRUE ~ first(pyar)),
fu_time_sort = tidytable::case_when(fu_sum == FALSE ~ first(fu_time_sort),
fu_sum == TRUE ~ 999),
yvar_name = tidytable::case_when(yvar_sum == FALSE ~ first(yvar_name),
yvar_sum == TRUE ~ first(paste("Total", yvar_name))),
yvar_sort = tidytable::case_when(yvar_sum == FALSE ~ first(yvar_sort),
yvar_sum == TRUE ~ 999),
yvar_sort_levels = tidytable::case_when(yvar_sum == FALSE ~ first(yvar_sort_levels),
yvar_sum == TRUE ~ 999),
.by = tidyselect::all_of(remaining_by_vars)) %>%
#calculate sir
tidytable::mutate(
sir = .data$observed / .data$expected,
sir_lci = (stats::qchisq(p = alpha / 2, df = 2 * .data$observed) / 2) / .data$expected,
sir_uci = (stats::qchisq(p = 1 - alpha / 2, df = 2 * (.data$observed + 1)) / 2) / .data$expected,
) %>%
mutate(across(
.cols = c(sir, sir_lci, sir_uci),
.fns = ~round(.x, 2)
)) %>%
relocate(sir, sir_lci, sir_uci, .after = expected)
}# data <- "d1_lung_wide"
# race_var <- "p_race.1"
# keep_hist_sir <- "Lung and Bronchus"
# refrates_used <- "refrates_methods_lcsubtype_histgroupiarc_dco" #only t_lsubtype == "Total - All histological subtypes"
refrates_tmp_methods_lcsubtype_dco_lc <- refrates_methods_lcsubtype_histgroupiarc_dco %>%
filter(t_lcsubtype == "Total - All histological subtypes")
res_sum_sir1_raw <- d1_lung_wide %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "none",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc) %>%
#then bind rates stratified by histological subtype of first LC
bind_rows({
d1_lung_wide %>%
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "t_sublungiarcgroup.1",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc)
}) %>%
#add method
mutate(method = "sir1_raw") %>%
rename(t_sublungiarcgroup.1 = t_lcsubtype)res_sum_sir1_raw_byreg <- d1_lung_wide %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_region.1",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc) %>%
#add method
mutate(method = "sir1_raw")[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 1m
Calculating SIR ■■■■■■■■■■ 29% | ETA: 1m
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 1m
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 40s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 23s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 12s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 244080 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 🎉
Test passed 🌈
res_sum_sir1_raw_byage <- d1_lung_wide %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_agefcgroup",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc) %>%
#add method
mutate(method = "sir1_raw")[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 18s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 15s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 13s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 9s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 244080 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 🥇
Test passed 🌈
res_sum_sir1_raw_byyear <- d1_lung_wide %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_yearfcgroup",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc) %>%
#add method
mutate(method = "sir1_raw")[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 13s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 11s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 9s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 244080 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 🌈
Test passed 🥇
splc_subtypes <- c(unique(d1_lung_wide$t_histgroupiarc.2))
refrates_tmp_methods_lcsubtype_histgroupiarc_dco <- refrates_methods_lcsubtype_histgroupiarc_dco %>%
mutate(t_site = t_lcsubtype) %>%
filter(t_site %in% splc_subtypes)
res_sum_sir1_raw_bysplctype <- d1_lung_wide %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
xbreak_var = "none",
race_var = "p_race.1",
site_var = "t_histgroupiarc.2",
keep_t_site = splc_subtypes,
refrates_used = refrates_tmp_methods_lcsubtype_histgroupiarc_dco) %>%
#add method
mutate(method = "sir1_raw")[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 32544 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
[INFO Unexpected Cases] There are observed cases in the results file that do not occur in the refrates_df.
ℹ 20 strata are affected.
A possible explanation can be:
- DCO cases or
- diagnosis of second cancer occured in different time period than first cancer
! Check attribute `notes_refcases` of results to see what strata are affected.
Test passed 😀
Test passed 🎊
#tests
testthat::test_that(
"sums match",
testthat::expect_equal(
res_sum_sir1_raw %>%
filter(t_sublungiarcgroup.1 == "Total - All lung cancers") %>%
select(age, sex, year, race, yvar_label, fu_time, observed, expected, sir, sir_lci, sir_uci) %>%
arrange(age, sex, year, race, yvar_label, fu_time),
res_sum_sir1_raw_bysplctype %>%
sum_sir_results_sum(remaining_by_vars = c("age", "sex", "year", "race", "yvar_label", "fu_time")) %>%
select(age, sex, year, race, yvar_label, fu_time, observed, expected, sir, sir_lci, sir_uci) %>%
arrange(age, sex, year, race, yvar_label, fu_time),
tolerance = 0.01
)
)Test passed 🌈
For details on calculation strategy see ?@sec-strategy-a.
For details on calculation strategy see ?@sec-strategy-b.
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
res_sum_sir2_sub_b_byreg <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = d1_lung_wide,
ref_df = refrates_methods_lcsubtype_histgroupiarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_region.1",
xbreak_var = "none",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Calculating SIR ■■■■■ 14% | ETA: 13s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 11s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Adenocarcinomas
Test passed 🎉
Calculating SIR for LC: Other specific carcinomas
Calculating SIR ■■■■■ 14% | ETA: 13s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 10s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specific carcinomas
Test passed 🎉
Calculating SIR for LC: Other specified types of cancer
Calculating SIR ■■■■■ 14% | ETA: 10s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 5s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specified types of cancer
Test passed 🥇
Calculating SIR for LC: Sarcomas and soft tissue tumours
Calculating SIR ■■■■■ 14% | ETA: 12s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 🌈
Calculating SIR for LC: Squamous carcinomas
Calculating SIR ■■■■■ 14% | ETA: 13s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 5s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Squamous carcinomas
Test passed 🌈
Calculating SIR for LC: Unspecified carcinomas (NOS)
Calculating SIR ■■■■■ 14% | ETA: 13s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 🥳
Calculating SIR for LC: Unspecified types of cancer
Calculating SIR ■■■■■ 14% | ETA: 13s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 7s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified types of cancer
Test passed 🥇
#calculate totals for excluding same histgroupiarc
res_sum_sir2_sum_b_t_byreg <- res_sum_sir2_sub_b_byreg %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir2_sub")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
res_sum_sir2_sub_b_byage <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = d1_lung_wide,
ref_df = refrates_methods_lcsubtype_histgroupiarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_agefcgroup",
xbreak_var = "reg.1",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Adenocarcinomas
Test passed 🎉
Calculating SIR for LC: Other specific carcinomas
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specific carcinomas
Test passed 🥳
Calculating SIR for LC: Other specified types of cancer
Calculating SIR ■■■■■ 14% | ETA: 6s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specified types of cancer
Test passed 🌈
Calculating SIR for LC: Sarcomas and soft tissue tumours
Calculating SIR ■■■■■ 14% | ETA: 7s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 😸
Calculating SIR for LC: Squamous carcinomas
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Squamous carcinomas
Test passed 😸
Calculating SIR for LC: Unspecified carcinomas (NOS)
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 5s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 🎊
Calculating SIR for LC: Unspecified types of cancer
Calculating SIR ■■■■■ 14% | ETA: 7s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified types of cancer
Test passed 🎊
#calculate totals for excluding same histgroupiarc
res_sum_sir2_sum_b_t_byage <- res_sum_sir2_sub_b_byage %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir2_sub")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
res_sum_sir2_sub_b_byyear <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = d1_lung_wide,
ref_df = refrates_methods_lcsubtype_histgroupiarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_yearfcgroup",
xbreak_var = "reg.1",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Calculating SIR ■■■■■ 14% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Adenocarcinomas
Test passed 🎊
Calculating SIR for LC: Other specific carcinomas
Calculating SIR ■■■■■ 14% | ETA: 7s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specific carcinomas
Test passed 🥳
Calculating SIR for LC: Other specified types of cancer
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specified types of cancer
Test passed 🌈
Calculating SIR for LC: Sarcomas and soft tissue tumours
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 😀
Calculating SIR for LC: Squamous carcinomas
Calculating SIR ■■■■■ 14% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Squamous carcinomas
Test passed 🎉
Calculating SIR for LC: Unspecified carcinomas (NOS)
Calculating SIR ■■■■■ 14% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 🌈
Calculating SIR for LC: Unspecified types of cancer
Calculating SIR ■■■■■■■■■■ 29% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified types of cancer
Test passed 🌈
#calculate totals for excluding same histgroupiarc
res_sum_sir2_sum_b_t_byyear <- res_sum_sir2_sub_b_byyear %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir2_sub")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
# data <- "d2_lung_wide_iarc"
# race_var <- "p_race.1"
# keep_hist_sir <- "Lung and Bronchus"
# refrates_used <- "refrates_methods_lcsubtype_histgroupiarc_iarc_dco" #only t_lsubtype == "Total - All histological subtypes"
refrates_tmp_methods_lcsubtype_iarc_dco_lc <- refrates_methods_lcsubtype_histgroupiarc_iarc_dco %>%
filter(t_lcsubtype == "Total - All histological subtypes")
res_sum_sir3_iarc <- d2_lung_wide_iarc %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "none",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_iarc_dco_lc) %>%
#then bind rates stratified by histological subtype of first LC
bind_rows({d2_lung_wide_iarc %>%
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "t_sublungiarcgroup.1",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_iarc_dco_lc)
}) %>%
#add method
mutate(method = "sir3_iarc") %>%
rename(t_sublungiarcgroup.1 = t_lcsubtype)res_sum_sir3_iarc_byreg <- d2_lung_wide_iarc %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_region.1",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_iarc_dco_lc) %>%
#add method
mutate(method = "sir3_iarc")[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 1m
Calculating SIR ■■■■■■■■■■ 29% | ETA: 1m
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 1m
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 41s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 24s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 12s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 244260 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 😸
Test passed 😀
res_sum_sir3_iarc_byage <- d2_lung_wide_iarc %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_agefcgroup",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_iarc_dco_lc) %>%
#add method
mutate(method = "sir3_iarc")[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 17s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 15s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 12s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 244260 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 🎊
Test passed 😀
res_sum_sir3_iarc_byyear <- d2_lung_wide_iarc %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_yearfcgroup",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_iarc_dco_lc) %>%
#add method
mutate(method = "sir3_iarc")[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 13s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 11s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 9s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 244260 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 😸
Test passed 🥳
#process results
res_sum_sir4_sum_a <- res_sum_sir4_subiarc_a %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "t_site", "fu_time")) %>%
rename(t_sublungiarcgroup.1 = xvar_label)
res_sum_sir4_sum_b <- res_sum_sir4_subiarc_b %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "fu_time")) %>%
mutate(t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
rename(t_sublungiarcgroup.1 = xvar_label) `t_site`is not among remaining_by_vars
• Check pyar calculations!
•
res_sum_sir4_sum_b_t <- res_sum_sir4_subiarc_b %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
#combine rates into one result file
res_sum_sir4_subiarc <- res_sum_sir4_sum_a %>%
bind_rows(res_sum_sir4_sum_b) %>%
bind_rows(res_sum_sir4_sum_b_t) %>%
#add method
mutate(method = "sir4_subiarc")
rm(res_sum_sir4_sum_a, res_sum_sir4_sum_b)
#tests
testthat::test_that(
"No double entries should be in results",
testthat::expect_equal(
res_sum_sir4_subiarc %>% nrow,
res_sum_sir4_subiarc %>% distinct(t_sublungiarcgroup.1, sex, yvar_label, fu_time, t_site) %>% nrow
))Test passed 😸
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
res_sum_sir4_sub_b_byreg <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = d2_lung_wide_iarc,
ref_df = refrates_methods_lcsubtype_histgroupiarc_iarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_region.1",
xbreak_var = "none",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Calculating SIR ■■■■■ 14% | ETA: 13s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 10s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 5s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Adenocarcinomas
Test passed 😸
Calculating SIR for LC: Other specific carcinomas
Calculating SIR ■■■■■ 14% | ETA: 13s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 10s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specific carcinomas
Test passed 🌈
Calculating SIR for LC: Other specified types of cancer
Calculating SIR ■■■■■ 14% | ETA: 10s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 5s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specified types of cancer
Test passed 🌈
Calculating SIR for LC: Sarcomas and soft tissue tumours
Calculating SIR ■■■■■ 14% | ETA: 12s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 🎊
Calculating SIR for LC: Squamous carcinomas
Calculating SIR ■■■■■ 14% | ETA: 13s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 10s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Squamous carcinomas
Test passed 🎊
Calculating SIR for LC: Unspecified carcinomas (NOS)
Calculating SIR ■■■■■ 14% | ETA: 13s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 5s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 🎉
Calculating SIR for LC: Unspecified types of cancer
Calculating SIR ■■■■■ 14% | ETA: 12s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 7s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified types of cancer
Test passed 🎊
#calculate totals for excluding same histgroupiarc
res_sum_sir4_sum_b_t_byreg <- res_sum_sir4_sub_b_byreg %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir4_subiarc")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
res_sum_sir4_sub_b_byage <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = d2_lung_wide_iarc,
ref_df = refrates_methods_lcsubtype_histgroupiarc_iarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_agefcgroup",
xbreak_var = "reg.1",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Adenocarcinomas
Test passed 🥳
Calculating SIR for LC: Other specific carcinomas
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specific carcinomas
Test passed 😸
Calculating SIR for LC: Other specified types of cancer
Calculating SIR ■■■■■ 14% | ETA: 6s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specified types of cancer
Test passed 🎉
Calculating SIR for LC: Sarcomas and soft tissue tumours
Calculating SIR ■■■■■ 14% | ETA: 7s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 🎉
Calculating SIR for LC: Squamous carcinomas
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Squamous carcinomas
Test passed 😀
Calculating SIR for LC: Unspecified carcinomas (NOS)
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 😀
Calculating SIR for LC: Unspecified types of cancer
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified types of cancer
Test passed 🥳
#calculate totals for excluding same histgroupiarc
res_sum_sir4_sum_b_t_byage <- res_sum_sir4_sub_b_byage %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir4_subiarc")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
res_sum_sir4_sub_b_byyear <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = d2_lung_wide_iarc,
ref_df = refrates_methods_lcsubtype_histgroupiarc_iarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_yearfcgroup",
xbreak_var = "reg.1",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Calculating SIR ■■■■■ 14% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Adenocarcinomas
Test passed 🥳
Calculating SIR for LC: Other specific carcinomas
Calculating SIR ■■■■■ 14% | ETA: 7s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specific carcinomas
Test passed 🥇
Calculating SIR for LC: Other specified types of cancer
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 0s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specified types of cancer
Test passed 😀
Calculating SIR for LC: Sarcomas and soft tissue tumours
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 🥳
Calculating SIR for LC: Squamous carcinomas
Calculating SIR ■■■■■ 14% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Squamous carcinomas
Test passed 😸
Calculating SIR for LC: Unspecified carcinomas (NOS)
Calculating SIR ■■■■■ 14% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 🎉
Calculating SIR for LC: Unspecified types of cancer
Calculating SIR ■■■■■ 14% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified types of cancer
Test passed 🎊
#calculate totals for excluding same histgroupiarc
res_sum_sir4_sum_b_t_byyear <- res_sum_sir4_sub_b_byyear %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir4_subiarc")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
res_sum_sir <-
bind_rows(res_sum_sir1_raw,
res_sum_sir2_sub,
res_sum_sir3_iarc,
res_sum_sir4_subiarc) %>%
mutate(registry = yvar_label,
t_sublungiarc_sort = case_match(t_sublungiarcgroup.1,
"Total - All lung cancers" ~ "AATotal",
.default = t_sublungiarcgroup.1),
t_site_sort = case_match(t_site,
"Lung and Bronchus" ~ "AALung",
"Lung and Bronchus [excluding same histgroupiarc]" ~ "AALung",
.default = t_site)) %>%
arrange(t_sublungiarc_sort, t_site_sort, sex, fu_time_sort, registry, method) %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry, method, sir, sir_lci, sir_uci,
observed, expected, fu_time_sort)histgroups <- c("Adenocarcinoma (AC)",
"Squamous cell carcinoma (SCC)",
"Small cell carcinoma (SCLC)",
"Large cell carcinoma (LCC)",
"Other & unspecified (O&U)")
#totals from SIR1
tab3_old_pre_a <- res_sum_sir %>%
filter(
method == "sir1_raw" &
t_sublungiarcgroup.1 %in% c("Total - All lung cancers", histgroups) &
registry %in% c("seer", "zfkd") &
fu_time == "Total 0.5 to Inf years") %>%
select(t_sublungiarcgroup.1, sex, registry, fu_time, t_site, sir, sir_lci, sir_uci,
observed, expected) %>%
arrange(t_sublungiarcgroup.1, sex, registry)#Totals from SIR2 by subtype
tab3_old_pre_b <- res_sum_sir %>%
filter(
method == "sir2_sub" &
t_sublungiarcgroup.1 %in% c("Total - All lung cancers", histgroups) &
registry %in% c("seer", "zfkd") &
fu_time == "Total 0.5 to Inf years") %>%
select(t_sublungiarcgroup.1, t_site, sex, registry, fu_time, sir, sir_lci, sir_uci, observed, expected) %>%
arrange(t_sublungiarcgroup.1, t_site, sex, registry)tab3_old_pre <- tab3_old_pre_a %>%
bind_rows(tab3_old_pre_b) %>%
arrange(t_sublungiarcgroup.1, sex, registry) %>%
mutate(var_name = case_when(t_site == "Lung and Bronchus" ~ "SIR_raw",
str_detect(t_site, "excluding") ~ "SIR_sub",
.default = NA)) %>%
mutate(across(
.cols = c(sir, sir_lci, sir_uci),
.fns = ~round(.x, 2)
)) %>%
filter(!is.na(var_name))res_sum_sir_byreg <- res_sum_sir1_raw_byreg %>%
filter(fu_time_sort == 999) %>%
select(p_region.1 = t_lcsubtype, sex, method, sir, sir_lci, sir_uci, pyar, observed, n_base) %>%
bind_rows({res_sum_sir2_sum_b_t_byreg %>%
filter(fu_time_sort == 999) %>%
select(p_region.1 = yvar_label, sex, method, sir, sir_lci, sir_uci, pyar, observed)}) %>%
bind_rows({res_sum_sir3_iarc_byreg %>%
filter(fu_time_sort == 999) %>%
select(p_region.1 = t_lcsubtype, sex, method, sir, sir_lci, sir_uci, pyar, observed)})%>%
bind_rows({res_sum_sir4_sum_b_t_byreg %>%
filter(fu_time_sort == 999) %>%
select(p_region.1 = yvar_label, sex, method, sir, sir_lci, sir_uci, pyar, observed)})res_sum_sir_byage <- res_sum_sir1_raw_byage %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers") %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = yvar_label,
p_agefcgroup = t_lcsubtype, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort) %>%
bind_rows({res_sum_sir2_sum_b_t_byage %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = xvar_label,
p_agefcgroup = yvar_label, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)}) %>%
bind_rows({res_sum_sir3_iarc_byage %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers") %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = yvar_label,
p_agefcgroup = t_lcsubtype, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)}) %>%
bind_rows({res_sum_sir4_sum_b_t_byage %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = xvar_label,
p_agefcgroup = yvar_label, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)})res_sum_sir_byyear <- res_sum_sir1_raw_byyear %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers") %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = yvar_label,
p_yearfcgroup = t_lcsubtype, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort) %>%
bind_rows({res_sum_sir2_sum_b_t_byyear %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = xvar_label,
p_yearfcgroup = yvar_label, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)}) %>%
bind_rows({res_sum_sir3_iarc_byyear %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers") %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = yvar_label,
p_yearfcgroup = t_lcsubtype, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)}) %>%
bind_rows({res_sum_sir4_sum_b_t_byyear %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = xvar_label,
p_yearfcgroup = yvar_label, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)})#calculate yearly DCO rates per region
sensa_dco_per_year <- d0_lung_wide_raw %>%
summarize(
n = n(),
n_dco = sum(t_confirm.1 == "DCO", na.rm = TRUE),
n_missing = sum((t_confirm.1 == "unknown" | is.na(t_confirm.1)), na.rm = TRUE),
.by = c(p_region.1, t_singleyeardiag.1)) %>%
mutate(genlc_dco_perc = n_dco / n,
genlc_miss_perc = n_missing / n) %>%
select(p_region.1, year = t_singleyeardiag.1, genlc_dco_perc, genlc_miss_perc)
sensa_dco_per_year %>%
filter(genlc_dco_perc < 0.10) %>%
arrange(p_region.1, year) %>%
count(p_region.1)We will exclude DE2 Bavaria, DE9 Lower Saxony, DEF Schleswig-Holstein, DEG Thuringia from sensitivity analyses
sensa_lowdc_d1_lung_wide <- d1_lung_wide %>%
# sensA: filter for the following registries of the first tumor (reasonable FU of at least 5 years, GEKID recommended, < 10% DCO rate):
# - Baden-Württemberg 2009-01-15 --> exclusion reason: too short FU
# - Bavaria 2002-01-15 --> exclusion reason: DCO rate > 10%
# - Berlin 1990-01-15 --> exclusion reason: low completeness <80% in 2012
# o Brandenburg 1990-01-15 --> inclusion from 2007 when DCO < 10%
# o Bremen 1998-01-15 --> inclusion from 2004 when DCO < 10%
# o Hamburg 1990-01-15 --> inclusion from 2008 when DCO < 10%
# - Hesse 1992-07-15 --> exclusion reason: low completeness <80% in 2012
# o Mecklenburg-Western Pomerania 1990-01-15 --> inclusion 2003-2011 when DCO < 10%
# - Lower Saxony 1997-01-15 --> exclusion reason: DCO rate > 10%
# - North Rhine-Westphalia 1986-01-15 --> exclusion reason: DCO rate > 10%
# - Rhineland-Palatinate 1998-01-15 --> exclusion reason: low completeness <90% in 2012
# o Saarland 1970-01-15 --> inclusion 2002-2011 when DCO < 10%
# o Saxony 1990-01-15 --> inclusion from 2005 when DCO < 10%
# - Saxony-Anhalt 1990-01-15 --> exclusion reason: low completeness <80% in 2012
# - Schleswig-Holstein 1998-01-15 --> exclusion reason: DCO rate > 10%
# - Thuringia 1990-01-15 --> exclusion reason: DCO rate > 10%
tidylog::filter(
(p_region.1 == "DE4 Brandenburg" & t_singleyeardiag.1 >= 2007) |
(p_region.1 == "DE5 Bremen" & t_singleyeardiag.1 >= 2004) |
(p_region.1 == "DE6 Hamburg" & t_singleyeardiag.1 >= 2008) |
(p_region.1 == "DE8 Mecklenburg-Western Pomerania" & t_singleyeardiag.1 >= 2003 & t_singleyeardiag.1 <= 2011) |
(p_region.1 == "DEC Saarland" & t_singleyeardiag.1 >= 2002 & t_singleyeardiag.1 <= 2011) |
(p_region.1 == "DED Saxony" & t_singleyeardiag.1 >= 2005))filter: removed 368,054 rows (92%), 31,340 rows remaining
res_sensa_stats <- list(
pyars = sensa_lowdc_d1_lung_wide %>% summarize(pyars = sum(p_futimeyrs.1)) %>% pull(pyars),
n_lc = sensa_lowdc_d1_lung_wide %>% nrow(),
n_splc = sensa_lowdc_d1_lung_wide %>% summarize(n = sum(t_lung.2)) %>% pull(n)
) # data <- "sensa_lowdc_d1_lung_wide"
# race_var <- "p_race.1"
# keep_hist_sir <- "Lung and Bronchus"
# refrates_used <- "refrates_methods_lcsubtype_histgroupiarc_dco" #only t_lsubtype == "Total - All histological subtypes"
refrates_tmp_methods_lcsubtype_dco_lc <- refrates_methods_lcsubtype_histgroupiarc_dco %>%
filter(t_lcsubtype == "Total - All histological subtypes")
sensa_res_sum_sir1_raw <- sensa_lowdc_d1_lung_wide %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "none",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc) %>%
#then bind rates stratified by histological subtype of first LC
bind_rows({
sensa_lowdc_d1_lung_wide %>%
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "t_sublungiarcgroup.1",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc)
}) %>%
#add method
mutate(method = "sir1_raw") %>%
rename(t_sublungiarcgroup.1 = t_lcsubtype)sensa_res_sum_sir1_raw_byreg <- sensa_lowdc_d1_lung_wide %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_region.1",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc) %>%
#add method
mutate(method = "sir1_raw")[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 18497 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 🎊
Test passed 😸
sensa_res_sum_sir1_raw_byage <- sensa_lowdc_d1_lung_wide %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_agefcgroup",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc) %>%
#add method
mutate(method = "sir1_raw")[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 18497 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 😀
Test passed 🌈
sensa_res_sum_sir1_raw_byyear <- sensa_lowdc_d1_lung_wide %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_yearfcgroup",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc) %>%
#add method
mutate(method = "sir1_raw")[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 18497 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 🌈
Test passed 🥇
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
sensa_res_sum_sir2_sub_b_byreg <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = sensa_lowdc_d1_lung_wide,
ref_df = refrates_methods_lcsubtype_histgroupiarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_region.1",
xbreak_var = "none",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Test passed 🌈
Calculating SIR for LC: Other specific carcinomas
Test passed 🎉
Calculating SIR for LC: Other specified types of cancer
Test passed 🌈
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 🥳
Calculating SIR for LC: Squamous carcinomas
Test passed 🎊
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 🌈
Calculating SIR for LC: Unspecified types of cancer
Test passed 🎊
#calculate totals for excluding same histgroupiarc
sensa_res_sum_sir2_sum_b_t_byreg <- sensa_res_sum_sir2_sub_b_byreg %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir2_sub")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
sensa_res_sum_sir2_sub_b_byage <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = sensa_lowdc_d1_lung_wide,
ref_df = refrates_methods_lcsubtype_histgroupiarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_agefcgroup",
xbreak_var = "reg.1",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Test passed 🌈
Calculating SIR for LC: Other specific carcinomas
Test passed 😸
Calculating SIR for LC: Other specified types of cancer
Test passed 🎉
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 😸
Calculating SIR for LC: Squamous carcinomas
Test passed 🎉
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 🥇
Calculating SIR for LC: Unspecified types of cancer
Test passed 😸
#calculate totals for excluding same histgroupiarc
sensa_res_sum_sir2_sum_b_t_byage <- sensa_res_sum_sir2_sub_b_byage %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir2_sub")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
sensa_res_sum_sir2_sub_b_byyear <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = sensa_lowdc_d1_lung_wide,
ref_df = refrates_methods_lcsubtype_histgroupiarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_yearfcgroup",
xbreak_var = "reg.1",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Test passed 🥳
Calculating SIR for LC: Other specific carcinomas
Test passed 🌈
Calculating SIR for LC: Other specified types of cancer
Test passed 😀
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 🥇
Calculating SIR for LC: Squamous carcinomas
Test passed 🌈
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 🥳
Calculating SIR for LC: Unspecified types of cancer
Test passed 😸
#calculate totals for excluding same histgroupiarc
sensa_res_sum_sir2_sum_b_t_byyear <- sensa_res_sum_sir2_sub_b_byyear %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir2_sub")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
sensa_res_sum_sir <-
bind_rows(sensa_res_sum_sir1_raw,
sensa_res_sum_sir2_sub) %>%
mutate(registry = yvar_label,
t_sublungiarc_sort = case_match(t_sublungiarcgroup.1,
"Total - All lung cancers" ~ "AATotal",
.default = t_sublungiarcgroup.1),
t_site_sort = case_match(t_site,
"Lung and Bronchus" ~ "AALung",
"Lung and Bronchus [excluding same histgroupiarc]" ~ "AALung",
.default = t_site)) %>%
arrange(t_sublungiarc_sort, t_site_sort, sex, fu_time_sort, registry, method) %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry, method, sir, sir_lci, sir_uci,
observed, expected, fu_time_sort)sensa_res_sum_sir_byreg <- sensa_res_sum_sir1_raw_byreg %>%
filter(fu_time_sort == 999) %>%
select(p_region.1 = t_lcsubtype, sex, method, sir, sir_lci, sir_uci, pyar, observed, n_base) %>%
bind_rows({sensa_res_sum_sir2_sum_b_t_byreg %>%
filter(fu_time_sort == 999) %>%
select(p_region.1 = yvar_label, sex, method, sir, sir_lci, sir_uci, pyar, observed)}) sensa_res_sum_sir_byage <- sensa_res_sum_sir1_raw_byage %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers") %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = yvar_label,
p_agefcgroup = t_lcsubtype, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort) %>%
bind_rows({sensa_res_sum_sir2_sum_b_t_byage %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = xvar_label,
p_agefcgroup = yvar_label, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)}) sensa_res_sum_sir_byyear <- sensa_res_sum_sir1_raw_byyear %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers") %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = yvar_label,
p_yearfcgroup = t_lcsubtype, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort) %>%
bind_rows({sensa_res_sum_sir2_sum_b_t_byyear %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = xvar_label,
p_yearfcgroup = yvar_label, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)}) We will include all German patients, but only U.S. patients with p_race == from sensitivity analyses
sensb_whites_d1_lung_wide <- d1_lung_wide %>%
tidylog::filter(
reg.1 == "zfkd" |
(reg.1 == "seer" & p_race.1 == "White")
)filter: removed 44,789 rows (11%), 354,605 rows remaining
sensb_whites_d2_lung_wide_iarc <- d2_lung_wide_iarc %>%
tidylog::filter(
reg.1 == "zfkd" |
(reg.1 == "seer" & p_race.1 == "White")
)filter: removed 44,979 rows (11%), 356,030 rows remaining
res_sensb_stats <- sensb_whites_d1_lung_wide %>%
filter(t_lung.2 == 1) %>%
count(t_siteicdocat.1, reg.1) %>%
rename(n_splc_d1 = n) %>%
bind_cols({sensb_whites_d2_lung_wide_iarc %>%
filter(t_lung.2 == 1) %>%
count(t_siteicdocat.1, reg.1) %>%
select(n_splc_d2 = n)})
res_sensb_stats# data <- "sensb_whites_d1_lung_wide"
# race_var <- "p_race.1"
# keep_hist_sir <- "Lung and Bronchus"
# refrates_used <- "refrates_methods_lcsubtype_histgroupiarc_dco" #only t_lsubtype == "Total - All histological subtypes"
refrates_tmp_methods_lcsubtype_dco_lc <- refrates_methods_lcsubtype_histgroupiarc_dco %>%
filter(t_lcsubtype == "Total - All histological subtypes")
sensb_res_sum_sir1_raw <- sensb_whites_d1_lung_wide %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "none",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc) %>%
#then bind rates stratified by histological subtype of first LC
bind_rows({
sensb_whites_d1_lung_wide %>%
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "t_sublungiarcgroup.1",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc)
}) %>%
#add method
mutate(method = "sir1_raw") %>%
rename(t_sublungiarcgroup.1 = t_lcsubtype)sensb_res_sum_sir1_raw_byreg <- sensb_whites_d1_lung_wide %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_region.1",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc) %>%
#add method
mutate(method = "sir1_raw")[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 38s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 34s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 26s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 19s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 11s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 114780 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 🎊
Test passed 🎉
sensb_res_sum_sir1_raw_byage <- sensb_whites_d1_lung_wide %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_agefcgroup",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc) %>%
#add method
mutate(method = "sir1_raw")[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 10s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 7s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 114780 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 😸
Test passed 🌈
sensb_res_sum_sir1_raw_byyear <- sensb_whites_d1_lung_wide %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_yearfcgroup",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_dco_lc) %>%
#add method
mutate(method = "sir1_raw")[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 7s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 114780 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 😸
Test passed 🎉
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
sensb_res_sum_sir2_sub_b_byreg <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = sensb_whites_d1_lung_wide,
ref_df = refrates_methods_lcsubtype_histgroupiarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_region.1",
xbreak_var = "none",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 5s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Adenocarcinomas
Test passed 🎊
Calculating SIR for LC: Other specific carcinomas
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specific carcinomas
Test passed 😸
Calculating SIR for LC: Other specified types of cancer
Calculating SIR ■■■■■ 14% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specified types of cancer
Test passed 😸
Calculating SIR for LC: Sarcomas and soft tissue tumours
Calculating SIR ■■■■■ 14% | ETA: 7s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 🎊
Calculating SIR for LC: Squamous carcinomas
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Squamous carcinomas
Test passed 😸
Calculating SIR for LC: Unspecified carcinomas (NOS)
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 😀
Calculating SIR for LC: Unspecified types of cancer
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified types of cancer
Test passed 🥇
#calculate totals for excluding same histgroupiarc
sensb_res_sum_sir2_sum_b_t_byreg <- sensb_res_sum_sir2_sub_b_byreg %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir2_sub")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
sensb_res_sum_sir2_sub_b_byage <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = sensb_whites_d1_lung_wide,
ref_df = refrates_methods_lcsubtype_histgroupiarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_agefcgroup",
xbreak_var = "reg.1",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Adenocarcinomas
Test passed 🥳
Calculating SIR for LC: Other specific carcinomas
Calculating SIR ■■■■■■■■■■ 29% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specific carcinomas
Test passed 😸
Calculating SIR for LC: Other specified types of cancer
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specified types of cancer
Test passed 🎉
Calculating SIR for LC: Sarcomas and soft tissue tumours
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 🎉
Calculating SIR for LC: Squamous carcinomas
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Squamous carcinomas
Test passed 🥳
Calculating SIR for LC: Unspecified carcinomas (NOS)
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 🥇
Calculating SIR for LC: Unspecified types of cancer
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified types of cancer
Test passed 🌈
#calculate totals for excluding same histgroupiarc
sensb_res_sum_sir2_sum_b_t_byage <- sensb_res_sum_sir2_sub_b_byage %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir2_sub")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
sensb_res_sum_sir2_sub_b_byyear <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = sensb_whites_d1_lung_wide,
ref_df = refrates_methods_lcsubtype_histgroupiarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_yearfcgroup",
xbreak_var = "reg.1",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Adenocarcinomas
Test passed 🥳
Calculating SIR for LC: Other specific carcinomas
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specific carcinomas
Test passed 🥳
Calculating SIR for LC: Other specified types of cancer
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specified types of cancer
Test passed 😀
Calculating SIR for LC: Sarcomas and soft tissue tumours
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 🥇
Calculating SIR for LC: Squamous carcinomas
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Squamous carcinomas
Test passed 😀
Calculating SIR for LC: Unspecified carcinomas (NOS)
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 😸
Calculating SIR for LC: Unspecified types of cancer
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified types of cancer
Test passed 🎉
#calculate totals for excluding same histgroupiarc
sensb_res_sum_sir2_sum_b_t_byyear <- sensb_res_sum_sir2_sub_b_byyear %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir2_sub")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
# data <- "sensb_whites_d2_lung_wide_iarc"
# race_var <- "p_race.1"
# keep_hist_sir <- "Lung and Bronchus"
# refrates_used <- "refrates_methods_lcsubtype_histgroupiarc_iarc_dco" #only t_lsubtype == "Total - All histological subtypes"
refrates_tmp_methods_lcsubtype_iarc_dco_lc <- refrates_methods_lcsubtype_histgroupiarc_iarc_dco %>%
filter(t_lcsubtype == "Total - All histological subtypes")
sensb_res_sum_sir3_iarc <- sensb_whites_d2_lung_wide_iarc %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "none",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_iarc_dco_lc) %>%
#then bind rates stratified by histological subtype of first LC
bind_rows({sensb_whites_d2_lung_wide_iarc %>%
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "t_sublungiarcgroup.1",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_iarc_dco_lc)
}) %>%
#add method
mutate(method = "sir3_iarc") %>%
rename(t_sublungiarcgroup.1 = t_lcsubtype)
sensb_res_sum_sir3_iarc_byreg <- sensb_whites_d2_lung_wide_iarc %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_region.1",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_iarc_dco_lc) %>%
#add method
mutate(method = "sir3_iarc")
sensb_res_sum_sir3_iarc_byage <- sensb_whites_d2_lung_wide_iarc %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_agefcgroup",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_iarc_dco_lc) %>%
#add method
mutate(method = "sir3_iarc")
sensb_res_sum_sir3_iarc_byyear <- sensb_whites_d2_lung_wide_iarc %>%
#first overall rates for LC Total - All histological subtypes
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "p_yearfcgroup",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_tmp_methods_lcsubtype_iarc_dco_lc) %>%
#add method
mutate(method = "sir3_iarc")#process results
sensb_res_sum_sir4_sum_a <- sensb_res_sum_sir4_subiarc_a %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "t_site", "fu_time")) %>%
rename(t_sublungiarcgroup.1 = xvar_label)
sensb_res_sum_sir4_sum_b <- sensb_res_sum_sir4_subiarc_b %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "fu_time")) %>%
mutate(t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
rename(t_sublungiarcgroup.1 = xvar_label) `t_site`is not among remaining_by_vars
• Check pyar calculations!
•
sensb_res_sum_sir4_sum_b_t <- sensb_res_sum_sir4_subiarc_b %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
#combine rates into one result file
sensb_res_sum_sir4_subiarc <- sensb_res_sum_sir4_sum_a %>%
bind_rows(sensb_res_sum_sir4_sum_b) %>%
bind_rows(sensb_res_sum_sir4_sum_b_t) %>%
#add method
mutate(method = "sir4_subiarc")
rm(sensb_res_sum_sir4_sum_a, sensb_res_sum_sir4_sum_b)
#tests
testthat::test_that(
"No double entries should be in results",
testthat::expect_equal(
sensb_res_sum_sir4_subiarc %>% nrow,
sensb_res_sum_sir4_subiarc %>% distinct(t_sublungiarcgroup.1, sex, yvar_label, fu_time, t_site) %>% nrow
))Test passed 🎊
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
sensb_res_sum_sir4_sub_b_byreg <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = sensb_whites_d2_lung_wide_iarc,
ref_df = refrates_methods_lcsubtype_histgroupiarc_iarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_region.1",
xbreak_var = "none",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Adenocarcinomas
Test passed 😸
Calculating SIR for LC: Other specific carcinomas
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specific carcinomas
Test passed 🎉
Calculating SIR for LC: Other specified types of cancer
Calculating SIR ■■■■■ 14% | ETA: 6s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specified types of cancer
Test passed 😸
Calculating SIR for LC: Sarcomas and soft tissue tumours
Calculating SIR ■■■■■ 14% | ETA: 7s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 🎊
Calculating SIR for LC: Squamous carcinomas
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Squamous carcinomas
Test passed 🥳
Calculating SIR for LC: Unspecified carcinomas (NOS)
Calculating SIR ■■■■■ 14% | ETA: 8s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 🥇
Calculating SIR for LC: Unspecified types of cancer
Calculating SIR ■■■■■ 14% | ETA: 7s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified types of cancer
Test passed 🎉
#calculate totals for excluding same histgroupiarc
sensb_res_sum_sir4_sum_b_t_byreg <- sensb_res_sum_sir4_sub_b_byreg %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir4_subiarc")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
sensb_res_sum_sir4_sub_b_byage <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = sensb_whites_d2_lung_wide_iarc,
ref_df = refrates_methods_lcsubtype_histgroupiarc_iarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_agefcgroup",
xbreak_var = "reg.1",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Calculating SIR ■■■■■■■■■■ 29% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Adenocarcinomas
Test passed 🎊
Calculating SIR for LC: Other specific carcinomas
Calculating SIR ■■■■■■■■■■ 29% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specific carcinomas
Test passed 😸
Calculating SIR for LC: Other specified types of cancer
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specified types of cancer
Test passed 🎊
Calculating SIR for LC: Sarcomas and soft tissue tumours
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 🥳
Calculating SIR for LC: Squamous carcinomas
Calculating SIR ■■■■■■■■■■ 29% | ETA: 4s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Squamous carcinomas
Test passed 🥇
Calculating SIR for LC: Unspecified carcinomas (NOS)
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 0s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 🎊
Calculating SIR for LC: Unspecified types of cancer
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified types of cancer
Test passed 🥳
#calculate totals for excluding same histgroupiarc
sensb_res_sum_sir4_sum_b_t_byage <- sensb_res_sum_sir4_sub_b_byage %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir4_subiarc")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
#create vector of varying histo
histologies <- c("Adenocarcinomas",
"Other specific carcinomas",
"Other specified types of cancer",
"Sarcomas and soft tissue tumours",
"Squamous carcinomas",
"Unspecified carcinomas (NOS)",
"Unspecified types of cancer"
)
#apply wrapper function to list of follow-up times
sensb_res_sum_sir4_sub_b_byyear <- histologies %>%
set_names() %>%
tidytable::map_dfr(.,
calc_sir_sublung,
wide_df = sensb_whites_d2_lung_wide_iarc,
ref_df = refrates_methods_lcsubtype_histgroupiarc_iarc_dco,
race_var = "p_race.1",
ybreak_vars = "p_yearfcgroup",
xbreak_var = "reg.1",
site_var = "t_histgroupiarc",
version = "C_any_other_histo2",
quiet = TRUE) Calculating SIR for LC: Adenocarcinomas
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 1s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Adenocarcinomas
Test passed 🎉
Calculating SIR for LC: Other specific carcinomas
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specific carcinomas
Test passed 🎊
Calculating SIR for LC: Other specified types of cancer
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Other specified types of cancer
Test passed 🌈
Calculating SIR for LC: Sarcomas and soft tissue tumours
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Sarcomas and soft tissue tumours
Test passed 😸
Calculating SIR for LC: Squamous carcinomas
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Squamous carcinomas
Test passed 🥇
Calculating SIR for LC: Unspecified carcinomas (NOS)
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 2s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified carcinomas (NOS)
Test passed 🎉
Calculating SIR for LC: Unspecified types of cancer
Calculating SIR ■■■■■■■■■■ 29% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
Calculating SIR for LC: Unspecified types of cancer
Test passed 🥳
#calculate totals for excluding same histgroupiarc
sensb_res_sum_sir4_sum_b_t_byyear <- sensb_res_sum_sir4_sub_b_byyear %>%
filter(str_detect(t_site, "^excluding")) %>%
sum_sir_results_sum(., remaining_by_vars = c("age", "sex", "region", "year", "race",
"yvar_label", "xvar_label", "fu_time")) %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers",
t_site = "Lung and Bronchus [excluding same histgroupiarc]") %>%
#add method
mutate(method = "sir4_subiarc")`t_site`is not among remaining_by_vars
• Check pyar calculations!
•
sensb_res_sum_sir <-
bind_rows(sensb_res_sum_sir1_raw,
sensb_res_sum_sir2_sub,
sensb_res_sum_sir3_iarc,
sensb_res_sum_sir4_subiarc) %>%
mutate(registry = yvar_label,
t_sublungiarc_sort = case_match(t_sublungiarcgroup.1,
"Total - All lung cancers" ~ "AATotal",
.default = t_sublungiarcgroup.1),
t_site_sort = case_match(t_site,
"Lung and Bronchus" ~ "AALung",
"Lung and Bronchus [excluding same histgroupiarc]" ~ "AALung",
.default = t_site)) %>%
arrange(t_sublungiarc_sort, t_site_sort, sex, fu_time_sort, registry, method) %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry, method, sir, sir_lci, sir_uci,
observed, expected, fu_time_sort)sensb_res_sum_sir_byreg <- sensb_res_sum_sir1_raw_byreg %>%
filter(fu_time_sort == 999) %>%
select(p_region.1 = t_lcsubtype, sex, method, sir, sir_lci, sir_uci, pyar, observed, n_base) %>%
bind_rows({sensb_res_sum_sir2_sum_b_t_byreg %>%
filter(fu_time_sort == 999) %>%
select(p_region.1 = yvar_label, sex, method, sir, sir_lci, sir_uci, pyar, observed)}) %>%
bind_rows({sensb_res_sum_sir3_iarc_byreg %>%
filter(fu_time_sort == 999) %>%
select(p_region.1 = t_lcsubtype, sex, method, sir, sir_lci, sir_uci, pyar, observed)})%>%
bind_rows({sensb_res_sum_sir4_sum_b_t_byreg %>%
filter(fu_time_sort == 999) %>%
select(p_region.1 = yvar_label, sex, method, sir, sir_lci, sir_uci, pyar, observed)})sensb_res_sum_sir_byage <- sensb_res_sum_sir1_raw_byage %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers") %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = yvar_label,
p_agefcgroup = t_lcsubtype, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort) %>%
bind_rows({sensb_res_sum_sir2_sum_b_t_byage %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = xvar_label,
p_agefcgroup = yvar_label, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)}) %>%
bind_rows({sensb_res_sum_sir3_iarc_byage %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers") %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = yvar_label,
p_agefcgroup = t_lcsubtype, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)}) %>%
bind_rows({sensb_res_sum_sir4_sum_b_t_byage %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = xvar_label,
p_agefcgroup = yvar_label, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)})sensb_res_sum_sir_byyear <- sensb_res_sum_sir1_raw_byyear %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers") %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = yvar_label,
p_yearfcgroup = t_lcsubtype, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort) %>%
bind_rows({sensb_res_sum_sir2_sum_b_t_byyear %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = xvar_label,
p_yearfcgroup = yvar_label, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)}) %>%
bind_rows({sensb_res_sum_sir3_iarc_byyear %>%
mutate(t_sublungiarcgroup.1 = "Total - All lung cancers") %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = yvar_label,
p_yearfcgroup = t_lcsubtype, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)}) %>%
bind_rows({sensb_res_sum_sir4_sum_b_t_byyear %>%
select(t_sublungiarcgroup.1, t_site, sex, fu_time, registry = xvar_label,
p_yearfcgroup = yvar_label, method, sir, sir_lci, sir_uci, pyar, observed, expected,
fu_time_sort)})#calculate sir1_raw based on official refrates
sens_res_sum_sir1_lc <- d1_lung_wide %>%
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "none",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_lungcancer_dco_calc) %>%
bind_rows({d1_lung_wide %>%
calc_sir_n_sum_lc(.,
race_var = "p_race.1",
xbreak_var = "t_sublungiarcgroup.1",
keep_t_site = "Lung and Bronchus",
refrates_used = refrates_lungcancer_dco_calc)
}) %>%
rename(t_sublungiarcgroup.1 = t_lcsubtype)[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 7s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 244080 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 😀
Test passed 🥳
[INFO Unknown Race] There are values from race missing in refrates_df.
ℹ The following values for race_var present in the data, is not availabe in refrates_df:
-> zzz_NA_explicit
For all missing reference levels of race, data will be matched to the category 'Total' in refrates_df.
! It is recommeded to clean race_var before running this function.
Calculating SIR ■■■■■ 14% | ETA: 24s
Calculating SIR ■■■■■■■■■■ 29% | ETA: 20s
Calculating SIR ■■■■■■■■■■■■■■ 43% | ETA: 16s
Calculating SIR ■■■■■■■■■■■■■■■■■■ 57% | ETA: 11s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■ 71% | ETA: 6s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 86% | ETA: 3s
Calculating SIR ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 100% | ETA: 0s
[INFO Refrates Missing] For some strata refrates are missing.
ℹ 244080 strata have no reference rates in `refrates_df`
- Solution could be to add these strata to `refrates_df`.
! Check attribute `problems_missing_ref_strata` of results to see what strata are affected.
Test passed 😀
Test passed 🎊
#test that results are unaltered when using overall rates from refrates_methods
waldo::compare(res_sum_sir1_raw %>% select(-method), sens_res_sum_sir1_lc)`attr(old, 'res_sir')` is absent
`attr(new, 'res_sir')` is an S3 object of class <tidytable/data.table/data.frame>, a list
old vs new
expected sir sir_lci sir_uci
- old[1, ] 130.45387865 2.33 2.08 2.61
+ new[1, ] 130.52977556 2.33 2.07 2.61
- old[2, ] 285.63595334 3.90 3.68 4.14
+ new[2, ] 285.78666238 3.90 3.68 4.14
- old[3, ] 136.94684649 8.16 7.69 8.66
+ new[3, ] 137.00444417 8.16 7.69 8.65
- old[4, ] 120.50265824 9.29 8.76 9.86
+ new[4, ] 120.55039691 9.29 8.75 9.85
- old[5, ] 9.93348199 11.88 9.83 14.23
+ new[5, ] 9.93416819 11.88 9.83 14.22
- old[6, ] 683.47281871 5.52 5.35 5.70
+ new[6, ] 683.80544721 5.52 5.35 5.70
- old[7, ] 14.74827018 1.42 0.88 2.18
+ new[7, ] 14.54675772 1.44 0.89 2.21
- old[8, ] 29.73404167 1.61 1.19 2.14
+ new[8, ] 29.30012149 1.64 1.21 2.17
- old[9, ] 13.58343450 2.87 2.04 3.92
+ new[9, ] 13.32021578 2.93 2.08 4.00
- old[10, ] 12.61810640 3.09 2.20 4.23
+ new[10, ] 12.16720222 3.21 2.28 4.38
and 134 more ...
old$expected | new$expected
[1] 130.45388 - 130.52978 [1]
[2] 285.63595 - 285.78666 [2]
[3] 136.94685 - 137.00444 [3]
[4] 120.50266 - 120.55040 [4]
[5] 9.93348 - 9.93417 [5]
[6] 683.47282 - 683.80545 [6]
[7] 14.74827 - 14.54676 [7]
[8] 29.73404 - 29.30012 [8]
[9] 13.58343 - 13.32022 [9]
[10] 12.61811 - 12.16720 [10]
... ... ... and 134 more ...
old$sir | new$sir
[4] 9.29 | 9.29 [4]
[5] 11.88 | 11.88 [5]
[6] 5.52 | 5.52 [6]
[7] 1.42 - 1.44 [7]
[8] 1.61 - 1.64 [8]
[9] 2.87 - 2.93 [9]
[10] 3.09 - 3.21 [10]
[11] 5.93 - 6.48 [11]
[12] 2.14 - 2.19 [12]
[13] 1.50 | 1.50 [13]
... ... ... and 2 more ...
old$sir | new$sir
[17] 6.10 | 6.10 [17]
[18] 3.77 | 3.77 [18]
[19] 0.48 | 0.48 [19]
[20] 0.75 - 0.76 [20]
[21] 1.04 - 1.07 [21]
[22] 1.23 - 1.27 [22]
[23] 1.75 - 1.87 [23]
[24] 0.85 - 0.87 [24]
[25] 2.60 | 2.60 [25]
[26] 4.41 | 4.41 [26]
... ... ... and 21 more ...
old$sir | new$sir
[48] 4.26 | 4.26 [48]
[49] 3.25 | 3.25 [49]
[50] 4.68 | 4.68 [50]
[51] 10.11 - 10.10 [51]
[52] 10.01 - 10.00 [52]
[53] 13.10 | 13.10 [53]
[54] 6.50 | 6.50 [54]
[55] 1.06 - 1.07 [55]
[56] 1.26 - 1.28 [56]
[57] 2.00 - 2.05 [57]
... ... ... and 30 more ...
old$sir | new$sir
[97] 1.03 | 1.03 [97]
[98] 1.77 | 1.77 [98]
[99] 4.00 | 4.00 [99]
[100] 5.96 - 5.95 [100]
[101] 5.23 | 5.23 [101]
[102] 2.68 | 2.68 [102]
[103] 0.91 | 0.91 [103]
[104] 2.73 | 2.73 [104]
[105] 5.88 - 5.87 [105]
[106] 6.97 | 6.97 [106]
... ... ... and 38 more ...
old$sir_lci | new$sir_lci
[1] 2.08 - 2.07 [1]
[2] 3.68 | 3.68 [2]
[3] 7.69 | 7.69 [3]
[4] 8.76 - 8.75 [4]
[5] 9.83 | 9.83 [5]
[6] 5.35 | 5.35 [6]
[7] 0.88 - 0.89 [7]
[8] 1.19 - 1.21 [8]
[9] 2.04 - 2.08 [9]
[10] 2.20 - 2.28 [10]
... ... ... and 17 more ...
`old$sir_lci[35:41]`: 3.77 3.39 1.23 2.14 4.87 6.65 5.69
`new$sir_lci[35:41]`: 3.77 3.39 1.23 2.13 4.87 6.65 5.69
`old$sir_lci[42:48]`: 3.71 0.79 2.45 4.84 9.11 3.37 3.70
`new$sir_lci[42:48]`: 3.71 0.79 2.45 4.83 9.11 3.37 3.70
And 6 more differences ...
res_sum_sir1_raw %>%
full_join(sens_res_sum_sir1_lc, by = c("t_sublungiarcgroup.1", "age", "region", "sex", "year", "race",
"yvar_name", "yvar_label", "yvar_sort", "yvar_sort_levels",
"fu_time", "fu_time_sort", "t_site")) %>%
mutate(diff_sir_perc = abs((sir.x - sir.y)/ sir.x),
diff_sir_abs = abs(sir.x - sir.y)) %>%
filter(diff_sir_abs > 0.1)testthat::test_that("Expect deviations in SIR smaller than 0.1 or for very small expected counts",
testthat::expect_equal(
res_sum_sir1_raw %>%
full_join(sens_res_sum_sir1_lc, by = c("t_sublungiarcgroup.1", "age", "region", "sex", "year", "race",
"yvar_name", "yvar_label", "yvar_sort", "yvar_sort_levels",
"fu_time", "fu_time_sort", "t_site")) %>%
mutate(diff_sir_perc = abs((sir.x - sir.y)/ sir.x),
diff_sir_abs = abs(sir.x - sir.y)) %>%
filter(diff_sir_abs > 0.1 & expected.x > 5) %>%
#among those remaining, only expect 10+ years strata %>%
filter(fu_time != "10+ years") %>%
#only one line should be remaining, with large SIR, where relative deviation of SIR is < 5%
filter(diff_sir_perc > 0.05) %>%
nrow(),
0
)
)Test passed 🥇
The deviations with ΔSIR > 0.1 only affect very few strata with low expected counts (few pyars -> for 10+ years of survival) and one stratum with very high SIR in ZfKD, where relative deviation of SIR is smaller than 5%.
#helper for rows with CIs
rows_ci <- c(2, 24:25)
#parameters of plot column
rh <- 70 #row height in px
#output_dir name for graphs
output_dir_name <- output_dir_tables_name
##create table with gt
#to merge variables https://gt.rstudio.com/reference/text_transform.html
tab1_pre <- tab1 %>%
#remove ASIR from years 2003-2012
dplyr::filter(!(category %in% c("ASIR in 2002 [per 100,000] (95% CI)",
"ASIR in 2003 [per 100,000] (95% CI)",
"ASIR in 2004 [per 100,000] (95% CI)",
"ASIR in 2005 [per 100,000] (95% CI)",
"ASIR in 2006 [per 100,000] (95% CI)",
"ASIR in 2007 [per 100,000] (95% CI)",
"ASIR in 2008 [per 100,000] (95% CI)",
"ASIR in 2009 [per 100,000] (95% CI)",
"ASIR in 2010 [per 100,000] (95% CI)",
"ASIR in 2011 [per 100,000] (95% CI)",
"ASIR in 2012 [per 100,000] (95% CI)"))) %>%
dplyr::select(group, variable, category, n_zfkd_Female, n_zfkd_Male, n_seer_Female, n_seer_Male, everything())
#get % of male and female
res_n_fc_perc_m_z <- tab1 %>% filter(variable == "Patients with primary LC [n (% of total)]") %>% pull(freq_zfkd_Male) %>% "*"(100) %>% round(., 1) %>% format(., nsmall = 1)
res_n_fc_perc_f_z <- tab1 %>% filter(variable == "Patients with primary LC [n (% of total)]") %>% pull(freq_zfkd_Female) %>% "*"(100) %>% round(., 1) %>% format(., nsmall = 1)
res_n_fc_perc_m_s <- tab1 %>% filter(variable == "Patients with primary LC [n (% of total)]") %>% pull(freq_seer_Male) %>% "*"(100) %>% round(., 1) %>% format(., nsmall = 1)
res_n_fc_perc_f_s <- tab1 %>% filter(variable == "Patients with primary LC [n (% of total)]") %>% pull(freq_seer_Female) %>% "*"(100) %>% round(., 1) %>% format(., nsmall = 1)
#plot table
tab1_gt <- tab1_pre %>%
#Start making gt table
gt::gt() %>%
#don't show first column and value, lci, uci
gt::cols_hide(
columns = c(group, variable,
value_zfkd_Female, value_zfkd_Male, value_seer_Female, value_seer_Male,
lci_zfkd_Female, lci_zfkd_Male, lci_seer_Female, lci_seer_Male,
uci_zfkd_Female, uci_zfkd_Male, uci_seer_Female, uci_seer_Male)
) %>%
#Column labelling
gt::cols_label(
category = md(""),
n_zfkd_Female = md(paste0("Female")),
n_zfkd_Male = md(paste0("Male")),
n_seer_Female = md(paste0("Female")),
n_seer_Male = md(paste0("Male"))
)%>%
#make col groups (spanner)
tab_spanner(
label = md("**Analysis Dataset -- Germany**<br>(ZfKD data from 11 regions)"),
columns = c(n_zfkd_Female, n_zfkd_Male)
) %>%
tab_spanner(
label = md("**Validation Dataset -- United States**<br>(SEER data from 17 regions)"),
columns = c(n_seer_Female, n_seer_Male)
) %>%
#gt: Define row groups -> careful: you need to add groups in reverse order... so bottom group first
gt::tab_row_group(
label = tab1_pre$variable[24],
rows = variable == tab1_pre$variable[24]
) %>%
gt::tab_row_group(
label = tab1_pre$variable[20],
rows = variable == tab1_pre$variable[20]
) %>%
gt::tab_row_group(
label = tab1_pre$variable[18],
rows = variable == tab1_pre$variable[18]
) %>%
gt::tab_row_group(
label = "Histology of LC",
rows = variable == tab1_pre$variable[13]
) %>%
gt::tab_row_group(
label = tab1_pre$variable[10],
rows = variable == tab1_pre$variable[10]
) %>%
gt::tab_row_group(
label = tab1_pre$variable[4],
rows = variable == tab1_pre$variable[4]
) %>%
gt::tab_row_group(
label = "Patients with primary LC (with at least 6 months of survival)",
rows = variable == tab1_pre$variable[3]
) %>%
gt::tab_row_group(
label = ifelse(en_gb, "Age-standardised incidence rate of lung cancer", "Age-standardized incidence rate of lung cancer"),
rows = variable == tab1_pre$variable[1]
) %>%
#define general number formatting by row
gt::fmt_number(
columns = c(n_zfkd_Female, n_zfkd_Male, n_seer_Female, n_seer_Male),
rows = c(3:8, 10:17, 20:23),
decimals = 0
) %>%
gt::fmt_percent(
columns = c(freq_zfkd_Female, freq_zfkd_Male, freq_seer_Female, freq_seer_Male ),
rows = c(3:8, 10:17, 20:23),
decimals = 1
) %>%
gt::cols_merge_n_pct(
col_n = c(n_zfkd_Female),
col_pct = c(freq_zfkd_Female)
) %>%
gt::cols_merge_n_pct(
col_n = c(n_zfkd_Male),
col_pct = c(freq_zfkd_Male)
) %>%
gt::cols_merge_n_pct(
col_n = c(n_seer_Female),
col_pct = c(freq_seer_Female)
) %>%
gt::cols_merge_n_pct(
col_n = c(n_seer_Male),
col_pct = c(freq_seer_Male)
) %>%
#special format plots
gt::text_transform(
locations = gt::cells_body(columns = c(n_zfkd_Female), row = 1),
fn = function(x) {here::here(output_dir_name, "asir_zfkd_Female.png") %>% gt::local_image(height = px(rh))}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_zfkd_Male), row = 1),
fn = function(x) {here::here(output_dir_name, "asir_zfkd_Male.png") %>% gt::local_image(height = px(rh))}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_seer_Female), row = 1),
fn = function(x) {here::here(output_dir_name, "asir_seer_Female.png") %>% gt::local_image(height = px(rh))}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_seer_Male), row = 1),
fn = function(x) {here::here(output_dir_name, "asir_seer_Male.png") %>% gt::local_image(height = px(rh))}
) %>%
#special format with CI
gt::text_transform(
locations = gt::cells_body(columns = c(n_zfkd_Female),
rows = rows_ci),
fn = function(x) {
paste0(sprintf("%.1f", tab1_pre$value_zfkd_Female[rows_ci]), " (", sprintf("%.1f", tab1_pre$lci_zfkd_Female[rows_ci]), " to ", sprintf("%.1f", tab1_pre$uci_zfkd_Female[rows_ci]), ")")
}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_zfkd_Male),
rows = rows_ci),
fn = function(x) {
paste0(sprintf("%.1f", tab1_pre$value_zfkd_Male[rows_ci]), " (", sprintf("%.1f", tab1_pre$lci_zfkd_Male[rows_ci]), " to ", sprintf("%.1f", tab1_pre$uci_zfkd_Male[rows_ci]), ")")
}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_seer_Female),
rows = rows_ci),
fn = function(x) {
paste0(sprintf("%.1f", tab1_pre$value_seer_Female[rows_ci]), " (", sprintf("%.1f", tab1_pre$lci_seer_Female[rows_ci]), " to ", sprintf("%.1f", tab1_pre$uci_seer_Female[rows_ci]), ")")
}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_seer_Male),
rows = rows_ci),
fn = function(x) {
paste0(sprintf("%.1f", tab1_pre$value_seer_Male[rows_ci]), " (", sprintf("%.1f", tab1_pre$lci_seer_Male[rows_ci]), " to ", sprintf("%.1f", tab1_pre$uci_seer_Male[rows_ci]), ")")
}
) %>%
#special format for row FU (copy from value col and round to 1 digit)
gt::text_transform(
locations = gt::cells_body(columns = c(n_zfkd_Female),
rows = category == "Mean follow-up [months]"),
fn = function(x) {
sprintf("%.1f", tab1_pre$value_zfkd_Female[tab1_pre$category == "Mean follow-up [months]"])
}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_zfkd_Male),
rows = category == "Mean follow-up [months]"),
fn = function(x) {
sprintf("%.1f", tab1_pre$value_zfkd_Male[tab1_pre$category == "Mean follow-up [months]"])
}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_seer_Female),
rows = category == "Mean follow-up [months]"),
fn = function(x) {
sprintf("%.1f", tab1_pre$value_seer_Female[tab1_pre$category == "Mean follow-up [months]"])
}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_seer_Male),
rows = category == "Mean follow-up [months]"),
fn = function(x) {
sprintf("%.1f", tab1_pre$value_seer_Male[tab1_pre$category == "Mean follow-up [months]"])
}
) %>%
#special format for row Median Age (copy from value col and round to 1 digit)
gt::text_transform(
locations = gt::cells_body(columns = c(n_zfkd_Female),
rows = category == "Median age [years]"),
fn = function(x) {
sprintf("%.1f", tab1_pre$value_zfkd_Female[tab1_pre$category == "Median age [years]"])
}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_zfkd_Male),
rows = category == "Median age [years]"),
fn = function(x) {
sprintf("%.1f", tab1_pre$value_zfkd_Male[tab1_pre$category == "Median age [years]"])
}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_seer_Female),
rows = category == "Median age [years]"),
fn = function(x) {
sprintf("%.1f", tab1_pre$value_seer_Female[tab1_pre$category == "Median age [years]"])
}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_seer_Male),
rows = category == "Median age [years]"),
fn = function(x) {
sprintf("%.1f", tab1_pre$value_seer_Male[tab1_pre$category == "Median age [years]"])
}
) %>%
#special format for row 31 (copy from value col and round to 0 digits)
gt::text_transform(
locations = gt::cells_body(columns = c(n_zfkd_Female),
rows = category == "Sum of PYAR"),
fn = function(x) {
sprintf("%.0f", tab1_pre$value_zfkd_Female[tab1_pre$category == "Sum of PYAR"])
}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_zfkd_Male),
rows = category == "Sum of PYAR"),
fn = function(x) {
sprintf("%.0f", tab1_pre$value_zfkd_Male[tab1_pre$category == "Sum of PYAR"])
}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_seer_Female),
rows = category == "Sum of PYAR"),
fn = function(x) {
sprintf("%.0f", tab1_pre$value_seer_Female[tab1_pre$category == "Sum of PYAR"])
}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = c(n_seer_Male),
rows = category == "Sum of PYAR"),
fn = function(x) {
sprintf("%.0f", tab1_pre$value_seer_Male[tab1_pre$category == "Sum of PYAR"])
}
) %>%
#make header
gt::tab_header(
title = paste0("Table 1: Characteristics of ", if(en_gb){"analysed"}else{"analyzed"}," study population with primary lung cancer"),
subtitle = paste0(if(en_gb){"Age-standardised"}else{"Age-standardized"}," incidence rates of primary lung cancer (ASIR), follow-up time, characteristics of patients included in main analysis with at least 6 months of survival and absolute incidence of second primary cancer (SPC) by sex")) %>%
#footnotes
gt::tab_footnote(
footnote = "after exclusion by age and unusual histology",
locations = gt::cells_row_groups("Patients with primary LC (with at least 6 months of survival)")
) %>%
tab_source_note(
source_note = paste0(if(en_gb){"ASIR age-standardised incidence rate based on the World Standard Population 1960; "}else{"ASIR age-standardized incidence rate based on the World Standard Population 1960; "}, "DCO death-certificate only; ", "IR incidence rate; ", "LC primary lung cancer; ", "PYAR person-years at risk; ", "SPC second primary cancer; ", "SPLC second primary lung cancer")
) %>%
#special formatting
#global table options
gt::opt_row_striping() %>% #add alternating stripes
gt::tab_options(data_row.padding = px(2)) %>% # reduce row height
##column width
gt::cols_width(
category ~ px(230),
starts_with("n_") ~ px(150)
)
#save table
tab1_gt %>%
gt::gtsave(
file.path(output_dir_tables, "tab1.png"),
vwidth = 1600, expand = 20
)
tab1_gt %>%
gt::tab_header(title = NULL, subtitle = NULL) %>%
gt::tab_source_note(source_note = NULL) %>% #not working in gt v0.3
gt::tab_options(#table.width = 900,
footnotes.marks = "letters") %>%
gt::gtsave(
file.path(output_dir_tables, "tab1.rtf")
)
tab1_gt %>%
gt::tab_header(title = NULL, subtitle = NULL) %>%
gt::tab_source_note(source_note = NULL) %>% #not working in gt v0.3
gt::tab_options(#table.width = 900,
footnotes.marks = "letters") %>%
gt::gtsave(
file.path(output_dir_tables, "tab1.docx")
)
#print table
tab1_gt| Table 1: Characteristics of analyzed study population with primary lung cancer | ||||
| Age-standardized incidence rates of primary lung cancer (ASIR), follow-up time, characteristics of patients included in main analysis with at least 6 months of survival and absolute incidence of second primary cancer (SPC) by sex | ||||
|
Analysis Dataset – Germany (ZfKD data from 11 regions) |
Validation Dataset – United States (SEER data from 17 regions) |
|||
|---|---|---|---|---|
| Female | Male | Female | Male | |
| Age-standardized incidence rate of lung cancer | ||||
| ASIR 2002 - 2013 | ||||
| ASIR in 2013 [per 100,000] (95% CI) | 16.4 (16.0 to 16.9) | 34.1 (33.5 to 34.7) | 12.0 (11.9 to 12.1) | 14.9 (14.8 to 15.0) |
| Patients with primary LC (with at least 6 months of survival)1 | ||||
| n (% of Total) | 43,175 (31.8%) | 92,397 (68.2%) | 133,401 (50.6%) | 130,421 (49.4%) |
| Age at diagnosis of LC | ||||
| 30 - 49 | 4,065 (9.4%) | 5,373 (5.8%) | 8,653 (6.5%) | 7,496 (5.7%) |
| 50 - 59 | 9,990 (23.1%) | 17,717 (19.2%) | 23,306 (17.5%) | 24,390 (18.7%) |
| 60 - 69 | 13,996 (32.4%) | 33,674 (36.4%) | 39,836 (29.9%) | 42,210 (32.4%) |
| 70 - 79 | 11,380 (26.4%) | 28,998 (31.4%) | 40,486 (30.3%) | 39,071 (30.0%) |
| 80+ | 3,744 (8.7%) | 6,635 (7.2%) | 21,120 (15.8%) | 17,254 (13.2%) |
| Median age [years] | 65.6 | 67.2 | 68.5 | 68.5 |
| Year of diagnosis of LC | ||||
| 2002 - 2005 | 11,931 (27.6%) | 30,235 (32.7%) | 43,382 (32.5%) | 44,697 (34.3%) |
| 2006 - 2009 | 14,712 (34.1%) | 31,499 (34.1%) | 44,922 (33.7%) | 43,957 (33.7%) |
| 2010 - 2013 | 16,532 (38.3%) | 30,663 (33.2%) | 45,097 (33.8%) | 41,767 (32.0%) |
| Histology of LC | ||||
| Squamous cell carcinoma (SCC) | 6,886 (15.9%) | 31,858 (34.5%) | 21,280 (16.0%) | 33,214 (25.5%) |
| Adenocarcinoma (AC) | 19,327 (44.8%) | 28,626 (31.0%) | 59,518 (44.6%) | 47,065 (36.1%) |
| Small cell carcinoma (SCLC) | 8,530 (19.8%) | 15,795 (17.1%) | 17,307 (13.0%) | 15,717 (12.1%) |
| Large cell carcinoma (LCC) | 2,616 (6.1%) | 5,652 (6.1%) | 7,343 (5.5%) | 7,667 (5.9%) |
| Other & unspecified (O&U) | 5,816 (13.5%) | 10,466 (11.3%) | 27,953 (21.0%) | 26,758 (20.5%) |
| Person-years at risk | ||||
| Mean follow-up [months] | 32.7 | 29.9 | 34.3 | 29.9 |
| Sum of PYAR | 117496 | 230346 | 380943 | 324648 |
| Patient status | ||||
| SPLC developed | 154 (0.4%) | 388 (0.4%) | 3,775 (2.8%) | 3,102 (2.4%) |
| other SPC developed | 1,337 (3.1%) | 3,416 (3.7%) | 4,188 (3.1%) | 4,836 (3.7%) |
| dead after LC | 29,300 (67.9%) | 68,582 (74.2%) | 90,026 (67.5%) | 96,168 (73.7%) |
| no event until end of follow-up | 12,384 (28.7%) | 20,011 (21.7%) | 35,412 (26.5%) | 26,315 (20.2%) |
| Absolute incidence rate of SPC | ||||
| SPLC IR [per 100,000 PYAR] (95% CI) | 131.1 (111.2 to 153.5) | 168.4 (152.1 to 186.1) | 991.0 (959.6 to 1023.1) | 955.5 (922.2 to 989.7) |
| Other SPC IR [per 100,000 PYAR] (95% CI) | 1137.9 (1077.7 to 1200.6) | 1483.0 (1433.7 to 1533.6) | 1099.4 (1066.3 to 1133.2) | 1489.6 (1447.9 to 1532.2) |
| ASIR age-standardized incidence rate based on the World Standard Population 1960; DCO death-certificate only; IR incidence rate; LC primary lung cancer; PYAR person-years at risk; SPC second primary cancer; SPLC second primary lung cancer | ||||
| 1 after exclusion by age and unusual histology | ||||
plt_w <- 25
tab2_gt <- tab2 %>%
#remove wrong male/female combinations
mutate(sir_3.38 = case_when(sex == "Female" ~ NA,
.default = sir_3.38),
target_3.38 = case_when(sex == "Female" ~ NA,
.default = target_3.38),
plot_sir_3.38 = case_when(sex == "Female" ~ NA,
.default = plot_sir_3.38),
sir_4.85 = case_when(sex == "Male" ~ NA,
.default = sir_4.85),
target_4.85 = case_when(sex == "Male" ~ NA,
.default = target_4.85),
plot_sir_4.85 = case_when(sex == "Male" ~ NA,
.default = plot_sir_4.85)) %>%
gt() %>%
#Column labelling
gt::cols_label(
sex ~ md(""),
starts_with("sir_") ~ md("SIR<sub>simIARC</sub>"),
starts_with("plot_") ~ md(""),
starts_with("target_") ~ md("SIR<sub>real</sub>")
)%>%
cols_hide(c(fu_time, reg, expected, pyar, n_base, sir_0.5,
starts_with("sir_lci"), starts_with("sir_uci"))) %>%
fmt_number(columns = starts_with("num_"),
decimals = 2) %>%
fmt_number(columns = starts_with("target_"),
decimals = 2) %>%
#replace NA with nothing
sub_missing(missing_text = "") %>%
#make bullet plots
gt_plt_bullet_mod(column = plot_sir_1, target = target_1, width = plt_w,
palette = c("darkgrey", "black"), background = "lightgrey") %>%
gt_plt_bullet_mod(column = plot_sir_2, target = target_2, width = plt_w,
palette = c("darkgrey", "black"), background = "lightgrey") %>%
gt_plt_bullet_mod(column = plot_sir_3.38, target = target_3.38, width = plt_w,
palette = c(colors_2_sex["Male"], colors_2_sex["Male"]), background = "lightgrey") %>%
gt_plt_bullet_mod(column = plot_sir_4.85, target = target_4.85, width = plt_w,
palette = c(colors_2_sex["Female"], colors_2_sex["Female"]), background = "lightgrey") %>%
#make col groups (spanner)
tab_spanner(
label = md("SIR<sub>real</sub> = 1.0<br>(No effect)"),
columns = c(sir_1, plot_sir_1, target_1)
) %>%
tab_spanner(
label = md("SIR<sub>real</sub> = 2.0<br>(Double the risk)"),
columns = c(sir_2, plot_sir_2, target_2)
) %>%
tab_spanner(
label = md("SIR<sub>real</sub> = 3.38<br>(SEER males)"),
columns = c(sir_3.38, plot_sir_3.38, target_3.38)
) %>%
tab_spanner(
label = md("SIR<sub>real</sub> = 4.85<br>(SEER females)"),
columns = c(sir_4.85, plot_sir_4.85, target_4.85)
) %>%
#formatting
tab_style(
style = list(
cell_text(weight = "bold")
),
locations = cells_body(
columns = starts_with("sir_")
)
) %>%
#make header
gt::tab_header(
title = md("Table 2: Estimating the risk for SPLC under IARC/IACR multiple primary rules <br> SIR<sub>simIARC</sub> given an assumed true risk SIR<sub>real</sub>")
) %>%
#footnotes
gt::tab_footnote(
footnote = "SIR for SPLC after LC for Males in SEER according to Thakur et al. 2018",
locations = gt::cells_column_spanners(contains("3.38"))
) %>%
gt::tab_footnote(
footnote = "SIR for SPLC after LC for Females in SEER according to Thakur et al. 2018",
locations = gt::cells_column_spanners(contains("4.85"))
) %>%
tab_source_note(
source_note = md(paste0("SIR<sub>simIARC</sub> simulated SIR under IARC/IACR multiple primary rules, not allowing same-histology SPLC; ", if(en_gb){"SIR standardised incidence ratio; "}else{"SIR standardized incidence ratio; "}, "PYAR person-years at risk; ", "SPC second primary cancer; ", "SPLC second primary lung cancer"))
) %>%
#special formatting
#global table options
# gt::opt_row_striping() %>% #add alternating stripes
cols_align(
align = "left",
columns = starts_with("target_")
) %>%
tab_style(
style = "padding-left:12px;padding-right:12px;",
locations = cells_column_spanners()
) %>%
tab_style(
style = "padding-right:12px;",
locations = list(
cells_body(columns = starts_with("target_")),
cells_column_labels(columns = starts_with("target_"))
)
) %>%
tab_style(
style = "padding-left:12px;",
locations = list(
cells_body(columns = starts_with("sir_")),
cells_column_labels(columns = starts_with("sir_"))
)
) %>%
tab_style(
style = cell_text(size = "small"),
locations = list(
cells_column_labels(columns = starts_with("sir_")),
cells_column_labels(columns = starts_with("target_"))
)
) %>%
##column width
gt::cols_width(
sex ~ px(80),
starts_with("sir_") ~ px(50),
#starts_with("plot_") ~ px(100),
starts_with("target_") ~ px(40)
)
#save table
tab2_gt %>%
gt::gtsave(
file.path(output_dir_tables, "tab2.png"),
vwidth = 1000, expand = 10
)
tab2_gt %>%
gt::tab_header(title = NULL, subtitle = NULL) %>%
gt::tab_source_note(source_note = NULL) %>% #not working in gt v0.3
gt::tab_options(#table.width = 900,
footnotes.marks = "letters") %>%
gt::gtsave(
file.path(output_dir_tables, "tab2.docx")
)
#print table
tab2_gt| Table 2: Estimating the risk for SPLC under IARC/IACR multiple primary rules SIRsimIARC given an assumed true risk SIRreal |
||||||||||||
|
SIRreal = 1.0 (No effect) |
SIRreal = 2.0 (Double the risk) |
SIRreal = 3.38 (SEER males)1 |
SIRreal = 4.85 (SEER females)2 |
|||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SIRsimIARC | SIRreal | SIRsimIARC | SIRreal | SIRsimIARC | SIRreal | SIRsimIARC | SIRreal | |||||
| Female | 0.71 | 1.00 | 1.41 | 2.00 | 3.42 | 4.85 | ||||||
| Male | 0.73 | 1.00 | 1.45 | 2.00 | 2.46 | 3.38 | ||||||
| SIRsimIARC simulated SIR under IARC/IACR multiple primary rules, not allowing same-histology SPLC; SIR standardized incidence ratio; PYAR person-years at risk; SPC second primary cancer; SPLC second primary lung cancer | ||||||||||||
| 1 SIR for SPLC after LC for Males in SEER according to Thakur et al. 2018 | ||||||||||||
| 2 SIR for SPLC after LC for Females in SEER according to Thakur et al. 2018 | ||||||||||||
#first overall and LC hist subtype results
tab3_pt1 <- res_sum_sir %>%
filter(fu_time_sort == 999 & str_detect(t_site, "Lung and Bronchus")) %>%
arrange(desc(registry)) %>%
mutate(t_site = "SPLC",
break_var = "t_sublungiarcgroup.1",
break_value = t_sublungiarcgroup.1) %>%
select(-t_sublungiarcgroup.1) %>%
pivot_wider(names_from = c(registry, method),
values_from = tidyselect::all_of(c("sir", "sir_lci", "sir_uci", "observed", "expected")),
names_glue = "{registry}.{method}.{.value}")
#second by age_group results
tab3_pt2 <- res_sum_sir_byage %>%
filter(fu_time_sort == 999 & str_detect(t_site, "Lung and Bronchus")) %>%
arrange(desc(registry)) %>%
mutate(t_site = "SPLC",
break_var = "p_agefcgroup",
break_value = p_agefcgroup) %>%
select(-t_sublungiarcgroup.1, -p_agefcgroup, -pyar) %>%
pivot_wider(names_from = c(registry, method),
values_from = tidyselect::all_of(c("sir", "sir_lci", "sir_uci", "observed", "expected")),
names_glue = "{registry}.{method}.{.value}")
#third by year_group results
tab3_pt3 <- res_sum_sir_byyear %>%
filter(fu_time_sort == 999 & str_detect(t_site, "Lung and Bronchus")) %>%
arrange(desc(registry)) %>%
mutate(t_site = "SPLC",
break_var = "p_yearfcgroup",
break_value = p_yearfcgroup) %>%
select(-t_sublungiarcgroup.1, -p_yearfcgroup, -pyar) %>%
pivot_wider(names_from = c(registry, method),
values_from = tidyselect::all_of(c("sir", "sir_lci", "sir_uci", "observed", "expected")),
names_glue = "{registry}.{method}.{.value}")tab3 <- tab3_pt1 %>%
bind_rows(tab3_pt2) %>%
bind_rows(tab3_pt3) %>%
mutate(zfkd.plot = zfkd.sir1_raw.sir,
seer.plot = seer.sir1_raw.sir)tab3_title <- md("Table 3: Validation analysis – Risk for SPLC using unadjusted and histology-specific SIR method")
tab3_subtitle <- "Comparing results for Germany (IARC/IACR MP rules) and United States (Verification dataset - SEER MP rules)"
tab3_source_note <- md(paste0(
"Notes: ",
"O<sub>SIR1</sub> number of cases observed in the data for SIR1<sub>raw</sub>; ",
"O<sub>SIR2</sub> number of cases observed in the data for SIR2<sub>sub</sub>, ZfKD data O<sub>SIR1</sub> = O<sub>SIR2</sub>; ",
"SEER Surveillance, Epidemiology, and End Results Program; ",
if(en_gb){"SIR standardised incidence ratio; "}else{"SIR standardized incidence ratio; "},
"SIR1<sub>raw</sub> unadjusted SIR using age-, sex-, region-, period-specific reference rates; ",
"SIR2<sub>sub</sub> histological subtype-specific SIR using age-, sex-, region-, period- and histology-specific reference rates and excluding same-histology group SPLC from observed and expected; ",
"SIR3<sub>IARC</sub> unadjusted SIR but only counting international primaries (IARC/IACR MP rules), for ZfKD data SIR1<sub>raw</sub> = SIR3<sub>IARC</sub>; ",
"SIR4<sub>subIARC</sub> histological subtype-specific SIR but only counting international primaries (IARC/IACR MP rules), for ZfKD data SIR2<sub>sub</sub> = SIR4<sub>subIARC</sub>; ",
"SPLC second primary lung cancer; ",
"x censored counts of observed smaller than 5 for data privacy reasons; ",
"ZfKD German Centre for Cancer Registry Data"))
tab3_gt <- tab3 %>%
gt() %>%
cols_hide(c(any_of(c("t_site", "fu_time", "fu_time_sort",
"sex", "break_var")),
ends_with(c("uci", "expected")),
ends_with(c("sir3_iarc.observed", "sir4_subiarc.observed")),
ends_with(c("sir1_raw.sir_lci", "sir3_iarc.sir_lci", "sir4_subiarc.sir_lci")),
contains(c("zfkd.sir3", "zfkd.sir4", "zfkd.sir1_raw.observed")),
)) %>%
#make header
gt::tab_header(
title = tab3_title,
subtitle = tab3_subtitle) %>%
#rename columns
gt::cols_label(
contains("break_var") ~ "",
contains("break_value") ~ "",
contains("plot") ~ "",
ends_with(".sir_lci") ~ md("95% CI<sub>SIR2</sub>"),
ends_with(".expected") ~ "E",
ends_with("sir1_raw.observed") ~ md("O<sub>SIR1</sub>"),
ends_with("sir2_sub.observed") ~ md("O<sub>SIR2</sub>"),
ends_with(".sir1_raw.sir") ~ md("SIR1<sub>raw</sub>"),
ends_with(".sir2_sub.sir") ~ md("**SIR2<sub>sub</sub>**"),
ends_with(".sir3_iarc.sir") ~ md("SIR3<sub>IARC</sub>"),
ends_with(".sir4_subiarc.sir") ~ md("SIR4<sub>subIARC</sub>")
) %>%
#make col groups (spanner)
tab_spanner(
label = md("**Germany**<br>(Analysis dataset - IARC/IACR MP rules)"),
columns = c(zfkd.sir1_raw.sir,
zfkd.sir2_sub.sir,
zfkd.sir2_sub.sir_lci,
zfkd.sir2_sub.observed,
zfkd.plot),
id = "german_spanner"
) %>%
tab_spanner(
label = md("**United States**<br>(Validation dataset - SEER MP rules)"),
columns = c(seer.sir1_raw.sir,
seer.sir2_sub.sir,
seer.sir2_sub.sir_lci,
seer.sir3_iarc.sir,
seer.sir4_subiarc.sir,
seer.sir1_raw.observed,
seer.sir2_sub.observed,
seer.plot),
id = "us_spanner"
) %>%
gt::rows_add(sex = "female_header", .before = 1) %>%
gt::rows_add(sex = "male_header", .before = 1) %>%
#make row groups
gt::tab_row_group(
label = "",
rows = (sex == "female_header"),
id = "female"
) %>%
gt::tab_row_group(
label = md("**Females**"),
rows = (break_value == "Total - All lung cancers" & sex == "Female"),
id = "female_tot"
) %>%
gt::tab_row_group(
label = "Histology of LC",
rows = (break_var == "t_sublungiarcgroup.1" & break_value != "Total - All lung cancers" & sex == "Female"),
id = "female_sub"
) %>%
gt::tab_row_group(
label = "Age at diagnosis of LC",
rows = (break_var == "p_agefcgroup" & sex == "Female"),
id = "female_age"
) %>%
gt::tab_row_group(
label = "Year of diagnosis of LC",
rows = (break_var == "p_yearfcgroup" & sex == "Female"),
id = "female_year"
) %>%
#make row groups
gt::tab_row_group(
label = "",
rows = (sex == "male_header"),
id = "male"
) %>%
gt::tab_row_group(
label = md("**Males**"),
rows = (break_value == "Total - All lung cancers" & sex == "Male"),
id = "male_tot"
) %>%
gt::tab_row_group(
label = "Histology of LC",
rows = (break_var == "t_sublungiarcgroup.1" & break_value != "Total - All lung cancers" & sex == "Male"),
id = "male_sub"
) %>%
gt::tab_row_group(
label = "Age at diagnosis of LC",
rows = (break_var == "p_agefcgroup" & sex == "Male"),
id = "male_age"
) %>%
gt::tab_row_group(
label = "Year of diagnosis of LC",
rows = (break_var == "p_yearfcgroup" & sex == "Male"),
id = "male_year"
) %>%
row_group_order(groups = c("female", "female_tot", "female_sub", "female_age", "female_year",
"male", "male_tot", "male_sub", "male_age", "male_year")) %>%
#column formatting
gt::fmt_number(
columns = contains(c("pyar", "observed", "n_base")),
decimals = 0
) %>%
gt::fmt_number(
columns = contains(c("expected")),
decimals = 1
) %>%
gt::fmt_number(
columns = ends_with(c(".sir", ".sir_lci", ".sir_uci")),
decimals = 2
) %>%
gt::sub_missing(
columns = everything(),
missing_text = ""
) %>%
#censor small values
sub_small_vals(
columns = zfkd.sir2_sub.observed,
rows = everything(),
threshold = 5,
small_pattern = "x") %>%
cols_merge_range(
col_begin = zfkd.sir2_sub.sir_lci,
col_end = zfkd.sir2_sub.sir_uci
) %>%
cols_merge_range(
col_begin = seer.sir2_sub.sir_lci,
col_end = seer.sir2_sub.sir_uci,
) %>%
#plotted columns
plot_gt_sircomp_dotplot(var1 = zfkd.plot, var2 = zfkd.sir2_sub.sir, var3 = seer.sir2_sub.sir,
col1 = colors_4_method[1], col2 = colors_4_method[2], col3 = colors_4_method[3],
label_x1 = x1, label_x2 = x2, label_x3 = "US",
x_min = 0.5, x_max = 10, width = 70) %>%
plot_gt_sircomp_dotplot(var1 = seer.plot, var2 = seer.sir2_sub.sir, var3 = zfkd.sir2_sub.sir,
col1 = colors_4_method[1], col2 = colors_4_method[2], col3 = colors_4_method[3],
label_x1 = x1, label_x2 = x2, label_x3 = "GER",
x_min = 0.5, x_max = 10, width = 70) %>%
tab_source_note(
source_note = tab3_source_note
) %>%
#special formatting
##make column and row group labels bold
gt::tab_style(
style = cell_text(weight = "bold"),
locations = list(
cells_body(columns = c(zfkd.sir2_sub.sir, seer.sir2_sub.sir))
)
) %>%
gt:: cols_width(
break_value ~ px(240),
zfkd.sir1_raw.sir ~ px(65),
zfkd.sir2_sub.sir ~ px(65),
zfkd.sir2_sub.sir_lci ~ px(87),
zfkd.sir2_sub.observed ~ px(42),
zfkd.plot ~ px(250),
seer.sir1_raw.sir ~ px(65),
seer.sir2_sub.sir ~ px(65),
seer.sir2_sub.sir_lci ~ px(95),
seer.sir3_iarc.sir ~ px(65),
seer.sir4_subiarc.sir ~ px(85),
seer.sir1_raw.observed ~ px(50),
seer.sir2_sub.observed ~ px(50),
seer.plot ~ px(250)
) %>%
#global table options
gt::opt_row_striping() %>% #add alternating stripes
gt::tab_options(data_row.padding = px(3), # reduce row height
row_group.padding = px(8), # reduce row height
stub.border.width = px(20), # increase space between column stubs
row.striping.include_stub = TRUE)
#output table
tab3_gtWarning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
| Table 3: Validation analysis – Risk for SPLC using unadjusted and histology-specific SIR method | |||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Comparing results for Germany (IARC/IACR MP rules) and United States (Verification dataset - SEER MP rules) | |||||||||||||
| Germany (Analysis dataset - IARC/IACR MP rules) |
United States (Validation dataset - SEER MP rules) |
||||||||||||
| SIR1raw | SIR2sub | 95% CISIR2 | OSIR2 | SIR1raw | SIR2sub | 95% CISIR2 | SIR3IARC | SIR4subIARC | OSIR1 | OSIR2 | |||
| Females | |||||||||||||
| Total - All lung cancers | 2.14 | 2.98 | 2.53–3.49 | 154 | 5.52 | 4.37 | 4.18–4.56 | 2.52 | 3.58 | 3,775 | 2,106 | ||
| Histology of LC | |||||||||||||
| Adenocarcinoma (AC) | 1.69 | 2.53 | 1.91–3.28 | 57 | 6.08 | 4.48 | 4.20–4.76 | 2.61 | 3.96 | 2,055 | 996 | ||
| Large cell carcinoma (LCC) | 0.24 | 0.29 | 0.01–1.60 | x | 4.04 | 3.95 | 3.27–4.73 | 0.32 | 0.36 | 139 | 119 | ||
| Other & unspecified (O&U) | 1.28 | 1.80 | 0.98–3.01 | 14 | 4.04 | 3.88 | 3.49–4.30 | 2.07 | 3.00 | 551 | 364 | ||
| Small cell carcinoma (SCLC) | 2.43 | 3.57 | 2.26–5.35 | 23 | 4.26 | 4.51 | 3.79–5.32 | 2.52 | 3.96 | 206 | 139 | ||
| Squamous cell carcinoma (SCC) | 4.35 | 5.17 | 3.94–6.67 | 59 | 6.50 | 4.66 | 4.26–5.09 | 3.37 | 4.08 | 824 | 488 | ||
| Age at diagnosis of LC | |||||||||||||
| 30 - 49 | 4.20 | 6.39 | 2.57–13.17 | 7 | 37.95 | 32.88 | 26.97–39.70 | 15.58 | 24.76 | 198 | 108 | ||
| 50 - 59 | 3.56 | 5.22 | 3.79–7.00 | 44 | 14.29 | 12.06 | 10.85–13.37 | 6.65 | 10.18 | 656 | 362 | ||
| 60 - 69 | 2.37 | 3.35 | 2.59–4.26 | 66 | 7.23 | 5.94 | 5.54–6.36 | 3.36 | 4.91 | 1,456 | 819 | ||
| 70 - 79 | 1.34 | 1.81 | 1.24–2.55 | 32 | 3.90 | 3.06 | 2.83–3.30 | 1.75 | 2.46 | 1,224 | 681 | ||
| 80+ | 0.82 | 1.06 | 0.34–2.47 | 5 | 2.05 | 1.55 | 1.30–1.83 | 0.92 | 1.23 | 241 | 136 | ||
| Year of diagnosis of LC | |||||||||||||
| 2002 - 2005 | 2.27 | 3.07 | 2.31–3.99 | 55 | 5.71 | 4.44 | 4.15–4.74 | 2.62 | 3.66 | 1,620 | 900 | ||
| 2006 - 2009 | 1.91 | 2.66 | 1.99–3.48 | 53 | 5.61 | 4.48 | 4.18–4.80 | 2.60 | 3.70 | 1,452 | 817 | ||
| 2010 - 2013 | 2.31 | 3.33 | 2.44–4.44 | 46 | 5.00 | 4.02 | 3.63–4.44 | 2.17 | 3.15 | 703 | 389 | ||
| Males | |||||||||||||
| Total - All lung cancers | 0.85 | 1.15 | 1.03–1.27 | 388 | 3.77 | 2.94 | 2.81–3.08 | 1.71 | 2.34 | 3,102 | 1,773 | ||
| Histology of LC | |||||||||||||
| Adenocarcinoma (AC) | 0.93 | 1.22 | 1.02–1.45 | 132 | 4.19 | 3.13 | 2.91–3.37 | 1.98 | 2.81 | 1,344 | 707 | ||
| Large cell carcinoma (LCC) | 0.04 | 0.04 | 0.00–0.24 | x | 2.79 | 2.92 | 2.42–3.48 | 0.25 | 0.28 | 135 | 123 | ||
| Other & unspecified (O&U) | 0.80 | 1.04 | 0.74–1.41 | 40 | 2.68 | 2.58 | 2.29–2.89 | 1.38 | 1.91 | 415 | 289 | ||
| Small cell carcinoma (SCLC) | 1.03 | 1.36 | 0.99–1.81 | 46 | 3.09 | 3.38 | 2.80–4.04 | 2.13 | 3.12 | 160 | 120 | ||
| Squamous cell carcinoma (SCC) | 0.89 | 1.25 | 1.07–1.46 | 169 | 4.22 | 2.86 | 2.62–3.11 | 1.77 | 2.35 | 1,048 | 534 | ||
| Age at diagnosis of LC | |||||||||||||
| 30 - 49 | 1.64 | 2.24 | 0.73–5.22 | 5 | 26.40 | 22.55 | 17.33–28.86 | 13.37 | 19.21 | 106 | 63 | ||
| 50 - 59 | 1.61 | 2.20 | 1.70–2.81 | 65 | 10.10 | 8.01 | 7.14–8.95 | 4.35 | 6.12 | 547 | 308 | ||
| 60 - 69 | 1.16 | 1.57 | 1.37–1.80 | 207 | 4.79 | 3.74 | 3.46–4.03 | 2.17 | 3.01 | 1,199 | 673 | ||
| 70 - 79 | 0.53 | 0.71 | 0.58–0.86 | 104 | 2.73 | 2.16 | 1.99–2.35 | 1.28 | 1.75 | 1,028 | 596 | ||
| 80+ | 0.20 | 0.25 | 0.10–0.51 | 7 | 1.60 | 1.26 | 1.06–1.49 | 0.69 | 0.91 | 222 | 133 | ||
| Year of diagnosis of LC | |||||||||||||
| 2002 - 2005 | 0.79 | 1.05 | 0.89–1.22 | 156 | 3.71 | 2.92 | 2.72–3.13 | 1.72 | 2.32 | 1,352 | 785 | ||
| 2006 - 2009 | 0.94 | 1.26 | 1.07–1.48 | 157 | 4.07 | 3.15 | 2.92–3.40 | 1.82 | 2.49 | 1,239 | 702 | ||
| 2010 - 2013 | 0.84 | 1.15 | 0.90–1.44 | 75 | 3.30 | 2.59 | 2.30–2.91 | 1.50 | 2.10 | 511 | 286 | ||
| Notes: OSIR1 number of cases observed in the data for SIR1raw; OSIR2 number of cases observed in the data for SIR2sub, ZfKD data OSIR1 = OSIR2; SEER Surveillance, Epidemiology, and End Results Program; SIR standardized incidence ratio; SIR1raw unadjusted SIR using age-, sex-, region-, period-specific reference rates; SIR2sub histological subtype-specific SIR using age-, sex-, region-, period- and histology-specific reference rates and excluding same-histology group SPLC from observed and expected; SIR3IARC unadjusted SIR but only counting international primaries (IARC/IACR MP rules), for ZfKD data SIR1raw = SIR3IARC; SIR4subIARC histological subtype-specific SIR but only counting international primaries (IARC/IACR MP rules), for ZfKD data SIR2sub = SIR4subIARC; SPLC second primary lung cancer; x censored counts of observed smaller than 5 for data privacy reasons; ZfKD German Centre for Cancer Registry Data | |||||||||||||
#save table
tab3_gt %>%
gt::gtsave(
file.path(output_dir_tables, "tab3.png"),
vwidth = 1600, expand = 30
)Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
tab3_gt %>%
gt::gtsave(
file.path(output_dir_tables, "tab3.docx")
)Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
fig2_title <- "Sex-specific relative risk for SPLC in German lung cancer survivors"
fig2_subtitle <- rlang::englue("Estimation of risk for SPLC after LC using general reference rates (<span style='color:{colors_2_method[1]}'>**SIR1<sub>raw</sub>**</span>) and subtype-specific reference rates excluding same-histology group (<span style='color:{colors_2_method[2]}'>**SIR2<sub>sub</sub>**</span>)")
fig2_caption <- paste0(if(en_gb){"SIR Standardised incidence ratio; "}else{"SIR Standardized incidence ratio; "}, "length of error bar indicates 95% CI; O observed cases; E expected cases")
fig2 <- res_sum_sir %>%
filter(fu_time_sort == 999 & str_detect(t_site, "excluding|Lung and Bronchus") & method %in% c("sir1_raw", "sir2_sub")) %>%
filter(registry == "zfkd") %>%
mutate(group = paste(sex, registry)) %>%
ggblanket::gg_pointrange(
x = sex,
y = sir,
col = method,
ymin = sir_lci,
ymax = sir_uci,
position = ggplot2::position_dodge(width = 0.8),
size = .6,
linewidth = .8,
pal = colors_2_method,
x_title = "",
y_title = "SIR",
y_trans = "log10",
y_limits = c(0.3, 10),
y_breaks = c(0.3, 1, 3, 10),
y_labels = c(0.3, 1, 3, 10),
y_oob = scales::oob_squish,
col_labels = c("**SIR1<sub>raw</sub>**", "**SIR2<sub>sub</sub>**"),
col_title = "",
col_legend_place = "bottom",
facet = t_sublungiarcgroup.1
) +
# ggrepel::geom_text_repel(aes(label = sir)) + #The ggrepel package can be used to neatly avoid overlapping labels.
geom_text(aes(
label = sprintf("%2.2f", sir)),
# position = ggplot2::position_dodge(width = 1.5)
) +
#add O/E for SIR1 in grey
geom_text(aes(
y = .5,
label = ifelse(method == "sir1_raw", paste0("O:", observed, "/E:", sprintf("%.1f", expected)), NA),
color = "darkgrey"),
size = 2.5
) +
#add O/E for SIR2 in color
geom_label(aes(
y = .4,
label = ifelse(method == "sir2_sub", paste0("O:", observed, "/E:", sprintf("%.1f", expected)), NA),
fill = method),
colour = "white",
size = 2.5,
label.padding = unit(0.1, "lines"),
) +
#add line of null effect
geom_hline(yintercept = 1,
linewidth = .2) +
#labeling
labs(title = fig2_title,
subtitle = fig2_subtitle,
caption = fig2_caption) +
theme(
legend.position = "bottom",
legend.text = ggtext::element_markdown(),
plot.title = ggtext::element_markdown(),
plot.subtitle = ggtext::element_markdown(size = 9),
)
#print figure
fig2Warning: Transformation introduced infinite values in continuous y-axis
Transformation introduced infinite values in continuous y-axis
Transformation introduced infinite values in continuous y-axis
Transformation introduced infinite values in continuous y-axis
Warning: Removed 12 rows containing missing values (`geom_text()`).
Warning: Removed 12 rows containing missing values (`geom_label()`).
#save figure
fig2 %>%
ggsave(filename = file.path(output_dir_tables, "fig2.png"),
width = 9, height = 6)Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous y-axis
Transformation introduced infinite values in continuous y-axis
Transformation introduced infinite values in continuous y-axis
Warning: Removed 12 rows containing missing values (`geom_text()`).
Warning: Removed 12 rows containing missing values (`geom_label()`).
fig2 %>%
ggsave(filename = file.path(output_dir_tables, "fig2.tiff"),
width = 9, height = 6, units = "in")Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous y-axis
Transformation introduced infinite values in continuous y-axis
Transformation introduced infinite values in continuous y-axis
Warning: Removed 12 rows containing missing values (`geom_text()`).
Warning: Removed 12 rows containing missing values (`geom_label()`).
res_hist_freq <- d1_lung_wide %>%
group_by(reg.1, p_sex.1) %>%
count(as.character(t_histgroupiarc.1)) %>%
mutate(freq = n / sum(n),
group = "Histology of LC") %>%
rename(t_histgroupiarc = "as.character(t_histgroupiarc.1)") %>%
arrange(t_histgroupiarc) %>%
mutate(pos = cumsum(freq) - (0.5 * freq),
perc = sprintf("%.0f%%", 100*freq)) %>%
bind_rows({d1_lung_wide %>%
filter(t_lung.2 == 1) %>%
group_by(reg.1, p_sex.1) %>%
count(as.character(t_histgroupiarc.1)) %>%
mutate(freq = n / sum(n),
group = "Histology of SPLC") %>%
rename(t_histgroupiarc = "as.character(t_histgroupiarc.1)") %>%
arrange(t_histgroupiarc) %>%
mutate(pos = cumsum(freq) - (0.5 * freq),
perc = sprintf("%.0f%%", 100*freq))}) %>%
mutate(reg.1 = factor(reg.1, levels=c('zfkd','seer')))supp_fig_hist <- res_hist_freq %>%
# one way to order values on axis
ggblanket::gg_col(
x = freq,
y = group,
col = t_histgroupiarc,
facet = p_sex.1,
facet2 = reg.1,
col_title = "",
position = "stack",
pal = cols4all::c4a("brewer.BrBG", 7),
x_title = "",
y_title = "",
title = "Histological groups of LC and SPLC",
subtitle = "Groups of malignant neoplasms considered to be histologically ‘different’ for the purpose of defining multiple tumors (IARC/IACR definition, ICD-O-3 first revision)") +
geom_text(aes(x = pos, y = group, label = perc),
size = 3, color = "black")
#print figure
supp_fig_hist#save figure
supp_fig_hist %>%
ggsave(filename = file.path(output_dir_tables, "supp_fig_hist.png"),
width = 12, height = 6)
supp_fig_hist %>%
ggsave(filename = file.path(output_dir_tables, "supp_fig_hist.tiff"),
width = 12, height = 6, units = "in")supp_tab_def <- tibble::tibble(
domain = c("**Timing**",
"**Location**",
"**Laterality** <br>(different side in same location)",
"**Histology** <br>(for same location and laterality)",
"**Behavior** <br>(for same location, laterality and histology)"),
iarc_rules = c("irrelevant",
"- different organ: ✅ SPC
- same organ: ❌ no SPC",
"❌ no SPC",
"
- different histological group (wide groups): ✅ SPC
- unknown or unspecified histology: ❌ no SPC",
"❌ no SPC"),
seer_rules = c("- after 1-5 “disease-free” years [3 years for lung cancer] the exact same cancer (location, behavior, histology, laterality) will be recorded as ✅ SPC
- after 60 days the same cancer (location, histology, laterality) with different behavior will be recorded as ✅ SPC",
"different location (mostly at third topography character, i.e. C33 is different from C34): ✅ SPC",
"different side: ✅ SPC",
"
- histology differs in third digit xx<ins>x</ins>x (narrow groups): ✅ SPC
- carcinoma/sarcoma NOS follows specified carcinoma/sarcoma or vice versa: ❌ no SPC",
"more than 60 days in between cancers: ✅ SPC"),
expected_splc = c("higher SPLC incidence for SEER MP rules",
"higher SPLC incidence for SEER MP rules",
"higher SPLC incidence for SEER MP rules",
"higher SPLC incidence for SEER MP rules",
"not relevant, because study only takes into account malignant behavior")
)supp_tab_def_gt <- supp_tab_def %>%
#Start making gt table
gt::gt() %>%
#make header
gt::tab_header(
title = "Supplement Table S1: Comparison of IARC/IACR and SEER multiple primary rules",
subtitle = "") %>%
#rename columns
gt::cols_label(
"domain" = "",
"iarc_rules" = "IARC/IACR MP rules",
"seer_rules" = "SEER MP rules",
"expected_splc" = md("Expected <br>incidence of SPLC")
) %>%
#special formatting
gt::fmt_markdown(
columns = everything()
) %>%
##top align all cells
gt::tab_style(
style = cell_text(v_align = "top"),
locations = cells_body(
columns = everything())
)%>%
##make column labels bold
gt::tab_style(
style = cell_text(weight = "bold"),
locations = list(
cells_column_labels(everything())
)
) %>%
##change font of Code column
tab_style(
style = cell_text(font = c("Consolas", default_fonts())),
locations = cells_body(
columns = c(iarc_rules,
seer_rules))
) %>%
## add footnotes
tab_source_note(
source_note = md(
paste0("IARC/IACR multiple primary rules according to [", ref_iarc_def, "] IARC Working Group Report. International rules for multiple primary cancers (ICD-O third edition). European Journal of Cancer Prevention 2005;14:307–8."
))
) %>%
tab_source_note(
source_note = md(
paste0("SEER multiple primary rules according to [", ref_seer_def, "] Johnson C, Peace S, Adamo P, Fritz A, Percy-Laurry A, Edwards BK. The 2007 Multiple Primary and Histology Coding Rules. Bethesda, MD: National Cancer Institute, SEER; 2007."
))
) %>%
##column width
gt::cols_width(
domain ~ px(150),
iarc_rules ~ px(250),
seer_rules ~ px(400),
expected_splc ~ px(180)
)
#save table
supp_tab_def_gt %>%
gt::gtsave(
file.path(output_dir_tables, "supp_tab_def.png"),
vwidth = 1000, expand = 20
)
supp_tab_def_gt| Supplement Table S1: Comparison of IARC/IACR and SEER multiple primary rules | |||
|---|---|---|---|
| IARC/IACR MP rules | SEER MP rules | Expected incidence of SPLC |
|
Timing |
irrelevant |
|
higher SPLC incidence for SEER MP rules |
Location |
|
different location (mostly at third topography character, i.e. C33 is different from C34): ✅ SPC |
higher SPLC incidence for SEER MP rules |
Laterality |
❌ no SPC |
different side: ✅ SPC |
higher SPLC incidence for SEER MP rules |
Histology |
|
|
higher SPLC incidence for SEER MP rules |
Behavior |
❌ no SPC |
more than 60 days in between cancers: ✅ SPC |
not relevant, because study only takes into account malignant behavior |
| IARC/IACR multiple primary rules according to [10] IARC Working Group Report. International rules for multiple primary cancers (ICD-O third edition). European Journal of Cancer Prevention 2005;14:307–8. | |||
| SEER multiple primary rules according to [12] Johnson C, Peace S, Adamo P, Fritz A, Percy-Laurry A, Edwards BK. The 2007 Multiple Primary and Histology Coding Rules. Bethesda, MD: National Cancer Institute, SEER; 2007. | |||
#The data for this table is manually entered here
#tribble function creates row-wise data tibble
supp_tab_filter <- readxl::read_xlsx(filter_file)supp_tab_filter_gt <- supp_tab_filter %>%
#Start making gt table
gt::gt() %>%
#make header
gt::tab_header(
title = "Table S2: Details of dataset filtering",
subtitle = "") %>%
#hide columns
gt::cols_hide(reg.1) %>%
#rename columns
gt::cols_label(
"stage" = "Filtering Stage",
"excluded" = "N excluded",
"remain" = "N remaining",
"code" = "Code",
"comments" = "Comments") %>%
#Row grouping
tab_row_group(
label = md("**United States (Verification dataset - SEER)**"),
rows = reg.1 == "seer"
) %>%
gt::tab_row_group(
label = md("**Germany (Analysis dataset - ZfKD)**"),
rows = reg.1 == "zfkd"
) %>%
#special formatting
gt::fmt_markdown(
columns = everything()
) %>%
##format missing
gt::sub_missing(
columns = starts_with("comments"),
missing_text = "") %>%
##left align
gt::cols_align(
align = "left",
columns = everything()
) %>%
##top align all cells
gt::tab_style(
style = cell_text(v_align = "top"),
locations = cells_body(
columns = everything())
)%>%
##make column labels bold
gt::tab_style(
style = cell_text(weight = "bold"),
locations =
cells_column_labels(everything())
) %>%
##change font of Code column
tab_style(
style = cell_text(font = c("Consolas", default_fonts())),
locations = cells_body(
columns = c(code))
) %>%
tab_footnote(
footnote = "GEKID. Atlas der Krebsinzidenz und Krebsmortalität der Gesellschaft der epidemiologischen Krebsregister in Deutschland e.V. (GEKID) [Internet]. Lübeck: Gesellschaft der epidemiologischen Krebsregister in Deutschland e.V.; 2021 [cited 2023 Jun 30] p. 20. Available from: https://atlas.gekid.de/CurrentVersion/Methoden%20GEKID%20Atlas.pdf",
locations = cells_body(columns = comments,
rows = str_detect(stage, "2 ") & reg.1 == "zfkd"),
) %>%
##column width
gt::cols_width(
stage ~ px(200),
excluded ~ px(80),
remain ~ px(90),
code ~ px(500),
comments ~ px(600)
)
#save table
supp_tab_filter_gt %>%
gt::gtsave(
file.path(output_dir_tables, "supp_tab_filter.png"),
vwidth = 1550, expand = 20
)#The data for this table is manually entered here
#tribble function creates row-wise data tibble
supp_tab_dm <- readxl::read_xlsx(dm_file)supp_tab_dm_gt <- supp_tab_dm %>%
#Start making gt table
gt::gt() %>%
#make header
gt::tab_header(
title = "Table S3: Details of data modifications",
subtitle = "") %>%
#rename columns
gt::cols_label(
"variable" = "Variable",
"details" = "Detailed description",
"code" = "Code") %>%
#special formatting
gt::fmt_markdown(
columns = everything()
) %>%
##left align
gt::cols_align(
align = "left",
columns = everything()
) %>%
##top align all cells
gt::tab_style(
style = cell_text(v_align = "top"),
locations = cells_body(
columns = everything())
)%>%
##make column labels bold
gt::tab_style(
style = cell_text(weight = "bold"),
locations =
cells_column_labels(everything())
) %>%
##change font of Code column
tab_style(
style = cell_text(font = c("Consolas", default_fonts())),
locations = cells_body(
columns = c(code))
) %>%
##column width
gt:: cols_width(
c(variable) ~ px(200),
c(details) ~ px(500),
c(code) ~ px(500)
)
#save table
supp_tab_dm_gt %>%
gt::gtsave(
file.path(output_dir_tables, "supp_tab_dm.png"),
vwidth = 1240, expand = 20
)
supp_tab_dm_gt| Table S3: Details of data modifications | ||
|---|---|---|
| Variable | Detailed description | Code |
Patient Status at end of follow-up |
|
msSPChelpR::pat_status_tt( |
Follow-up time of patient in years |
|
msSPChelpR::calc_futime_tt( |
Type of diagnostic confirmation |
|
ZfKD data: |
Groups of malignant neoplasms histologically different |
|
histgroup_iarc(hist_var = t_hist, new_var_hist = t_histgroupiarc, version = |
Regional Registry | Cases LC | Cases SPC % | SPLC DCO%, Microscopic %,
#Cases LC
supp_tab_qual_1 <- d1_lung_wide %>%
count(p_region.1, name = "lc_n")
#Overall DCO & Microscopic rate for LC
supp_tab_qual_2 <- d0_lung_wide_raw %>%
summarize(
n = n(),
n_dco = sum(t_confirm.1 == "DCO", na.rm = TRUE),
n_micro = sum(t_confirm.1 %in% c("cytology", "histology"), na.rm = TRUE),
n_other = sum(t_confirm.1 %in% c("autopsy", "clinical (without diagnostics)", "clinical with diagnostics", "tumor markers"), na.rm = TRUE),
n_missing = sum((t_confirm.1 == "unknown" | is.na(t_confirm.1)), na.rm = TRUE),
.by = p_region.1) %>%
mutate(genlc_dco_perc = n_dco / n,
genlc_micro_perc = n_micro / n,
genlc_other_perc = n_other / n,
genlc_miss_perc = n_missing / n) %>%
select(p_region.1, genlc_dco_perc, genlc_micro_perc, genlc_other_perc, genlc_miss_perc)
#Cases and percentage SPLC
supp_tab_qual_3 <- d1_lung_wide %>%
summarize(lc_n = n(),
pyar_sum = sum(p_futimeyrs.1),
spc_n = sum(p_spc == "SPC developed"),
splc_n = sum(t_lung.2),
.by = p_region.1) %>%
mutate(spc_perc = spc_n / lc_n,
splc_perc = splc_n / lc_n) %>%
select(-lc_n)
#SIR for SPLC by region
supp_tab_qual_4 <- res_sum_sir_byreg %>%
pivot_wider(names_from = c(method, sex),
values_from = tidyselect::all_of(c("sir", "sir_lci", "sir_uci", "pyar", "observed", "n_base")),
names_sep = "_") %>%
relocate()
#merge together
supp_tab_qual <- supp_tab_qual_1 %>%
left_join(supp_tab_qual_2, by = c("p_region.1")) %>%
left_join(supp_tab_qual_3, by = c("p_region.1")) %>%
left_join(supp_tab_qual_4, by = c("p_region.1"))
#test that "Case counts and PYR from data for SPLC match SIR results"
testthat::test_that("Case counts and PYR from data for SPLC match SIR results",
{
testthat::expect_equal(
supp_tab_qual %>% select(p_region.1, splc_n),
supp_tab_qual %>%
mutate(splc_n = observed_sir1_raw_Female + observed_sir1_raw_Male) %>%
select(p_region.1, splc_n)
)
testthat::expect_equal(
supp_tab_qual %>% select(p_region.1, lc_n),
supp_tab_qual %>%
mutate(lc_n = n_base_sir1_raw_Female + n_base_sir1_raw_Male) %>%
select(p_region.1, lc_n)
)
testthat::expect_equal(
supp_tab_qual %>% select(p_region.1, pyar_sum),
supp_tab_qual %>%
mutate(pyar_sum = pyar_sir1_raw_Female + pyar_sir1_raw_Male + lc_n*0.5) %>%
select(p_region.1, pyar_sum),
tolerance = 0.0001
)
}
)Test passed 😸
supp_tab_qual_gt <- supp_tab_qual %>%
#prepare column of list of cofirmation types
mutate(genlc_dco_perc2 = genlc_dco_perc,
genlc_micro_perc2 = genlc_micro_perc,
genlc_missother_perc2 = genlc_miss_perc + genlc_other_perc) %>%
nest(genlc_confirm = c(genlc_dco_perc2, genlc_micro_perc2, genlc_missother_perc2)) %>%
gt() %>%
#don't show first column and value, lci, uci
gt::cols_hide(
columns = c(starts_with("n_base"), starts_with("observed"), starts_with("pyar_sir"),
starts_with("sir_lci"), starts_with("sir_uci"), contains("sir3"),
contains("sir4"),
all_of(c("genlc_dco_perc", "genlc_micro_perc", "genlc_miss_perc", "genlc_other_perc")))
) %>%
#Column labelling
gt::cols_label(
p_region.1 = md("Regional Registry"),
lc_n = md("LC Cases"),
genlc_dco_perc = md("DCO"),
genlc_micro_perc = md("Microscopic"),
genlc_other_perc = md("Other"),
genlc_miss_perc = md("Missing"),
pyar_sum = md("PYAR"),
spc_n = md("Cases all SPC (%)"),
splc_n = md("Cases SPLC (%)"),
sir_sir1_raw_Female = md("SIR1<sub>raw</sub> Female"),
sir_sir2_sub_Female = md("SIR2<sub>sub</sub> Female"),
sir_sir1_raw_Male = md("SIR1<sub>raw</sub> Male"),
sir_sir2_sub_Male = md("SIR2<sub>sub</sub> Male")
)%>%
#gt: Define row groups -> careful: you need to add groups in reverse order... so bottom group first
gt::tab_row_group(
label = md("**United States - SEER**"),
rows = c(12:28)
) %>%
gt::tab_row_group(
label = md("**Germany - ZfKD**"),
rows = c(1:11)
) %>%
#column spanners
gt::tab_spanner(
label = "Type of diagnostic confirmation for all LC",
columns = starts_with("genlc_")
) %>%
gt::tab_spanner(
label = "Risk for SPLC - SIR (95% CI)",
columns = c(sir_sir1_raw_Female, sir_sir2_sub_Female,
sir_sir1_raw_Male, sir_sir2_sub_Male)
) %>%
#column formatting
gt::fmt_number(
columns = c(lc_n, pyar_sum),
decimals = 0
) %>%
# gt::fmt_percent(
# columns = starts_with("genlc_"),
# decimals = 1
# ) %>%
gt::fmt_percent(
columns = c(spc_perc, splc_perc),
decimals = 1
) %>%
gt::cols_merge_n_pct(
col_n = c(spc_n),
col_pct = c(spc_perc)
) %>%
gt::cols_merge_n_pct(
col_n = c(splc_n),
col_pct = c(splc_perc)
) %>%
#special format plots
gtExtras::gt_plt_bar_stack(
column = genlc_confirm,
fmt_fn = scales::label_number(accurary = 1, scale = 100, suffix = "%"),
palette = cols4all::c4a("hcl.yellow_purple", 3),
labels = c("DCO", "Microscopic", "Other")
) %>%
#special format SIR with CI
fmt(
columns = sir_sir1_raw_Female,
fns = function(x) {paste0(x," (", supp_tab_qual$sir_lci_sir1_raw_Female, "—",
supp_tab_qual$sir_uci_sir1_raw_Female, ")")}
)%>%
fmt(
columns = sir_sir1_raw_Male,
fns = function(x) {paste0(x," (", supp_tab_qual$sir_lci_sir1_raw_Male, "—",
supp_tab_qual$sir_uci_sir1_raw_Male, ")")}
)%>%
fmt(
columns = sir_sir2_sub_Female,
fns = function(x) {paste0(x," (", supp_tab_qual$sir_lci_sir2_sub_Female, "—",
supp_tab_qual$sir_uci_sir2_sub_Female, ")")}
)%>%
fmt(
columns = sir_sir2_sub_Male,
fns = function(x) {paste0(x," (", supp_tab_qual$sir_lci_sir2_sub_Male, "—",
supp_tab_qual$sir_uci_sir2_sub_Male, ")")}
)%>%
#make header
gt::tab_header(
title = paste0("Table S4: Data quality for included regions and SIR estimates"),
subtitle = paste0("")) %>%
#footnotes
#footnotes
gt::tab_footnote(
footnote = "Microscopic diagnoses include cytology, and histology of the tumor. Other diagnoses include autopsy, clinical (without diagnostics), clinical with diagnostics, tumor markers and missing information on source of diagnosis.",
locations = cells_column_labels(columns = genlc_confirm)
) %>%
# gt::tab_footnote(
# footnote = "Microscopic diagnoses include cytology, and histology of the tumor.",
# locations = cells_column_labels(columns = genlc_micro_perc)
# ) %>%
# #footnotes
# gt::tab_footnote(
# footnote = "Other diagnoses include autopsy, clinical (without diagnostics), clinical with diagnostics and tumor markers.",
# locations = cells_column_labels(columns = genlc_other_perc)
# ) %>%
tab_source_note(
source_note = paste0(if(en_gb){"SIR Standardised incidence ratio; "}else{"SIR Standardized incidence ratio; "}, "DCO death-certificate only; ", "LC primary lung cancer; ", "PYAR person-years at risk; ", "SPC second primary cancer; ", "SPLC second primary lung cancer")
) %>%
#special formatting
#global table options
gt::opt_row_striping() %>% #add alternating stripes
gt::tab_options(data_row.padding = px(2)) %>% # reduce row height
##column width
gt::cols_width(
p_region.1 ~ px(300),
starts_with("genlc_") ~ px(30),
starts_with("sir_sir1") ~ px(140),
starts_with("sir_sir2") ~ px(140)
)
#save table
supp_tab_qual_gt %>%
gt::gtsave(
file.path(output_dir_tables, "supp_tab_qual.png"),
vwidth = 1570, expand = 20
)
#print table
supp_tab_qual_gt| Table S4: Data quality for included regions and SIR estimates | |||||||||
| Regional Registry | LC Cases | Type of diagnostic confirmation for all LC | PYAR | Cases all SPC (%) | Cases SPLC (%) | Risk for SPLC - SIR (95% CI) | |||
|---|---|---|---|---|---|---|---|---|---|
DCO||Microscopic||Other 1 |
SIR1raw Female | SIR2sub Female | SIR1raw Male | SIR2sub Male | |||||
| Germany - ZfKD | |||||||||
| DE2 Bavaria | 33,340 | 88,402 | 1347 (4.0%) | 87 (0.3%) | 1.34 (0.84—2.03) | 1.86 (1.17—2.82) | 0.76 (0.59—0.97) | 1 (0.77—1.28) | |
| DE4 Brandenburg | 9,736 | 24,193 | 304 (3.1%) | 67 (0.7%) | 4.29 (2.5—6.86) | 5.85 (3.41—9.37) | 1.38 (1.02—1.81) | 1.84 (1.37—2.43) | |
| DE5 Bremen | 3,217 | 9,041 | 135 (4.2%) | 18 (0.6%) | 1.56 (0.51—3.64) | 2.27 (0.74—5.29) | 0.88 (0.47—1.5) | 1.22 (0.65—2.09) | |
| DE6 Hamburg | 6,929 | 17,525 | 305 (4.4%) | 7 (0.1%) | 0.46 (0.1—1.35) | 0.65 (0.13—1.89) | 0.17 (0.05—0.44) | 0.23 (0.06—0.59) | |
| DE8 Mecklenburg-Western Pomerania | 6,536 | 15,615 | 224 (3.4%) | 32 (0.5%) | 2.62 (1.06—5.41) | 3.64 (1.46—7.5) | 1 (0.65—1.48) | 1.34 (0.87—1.98) | |
| DE9 Lower Saxony | 28,736 | 72,266 | 1231 (4.3%) | 53 (0.2%) | 1.01 (0.56—1.66) | 1.41 (0.79—2.33) | 0.4 (0.28—0.55) | 0.54 (0.38—0.74) | |
| DEA3 Muenster | 10,011 | 25,068 | 400 (4.0%) | 77 (0.8%) | 4.68 (3.09—6.81) | 6.47 (4.26—9.41) | 1.27 (0.94—1.67) | 1.71 (1.27—2.25) | |
| DEC Saarland | 4,737 | 12,844 | 119 (2.5%) | 0 | 0 (0—1.16) | 0 (0—1.61) | 0 (0—0.17) | 0 (0—0.23) | |
| DED Saxony | 13,544 | 34,304 | 464 (3.4%) | 61 (0.5%) | 2.64 (1.32—4.73) | 3.66 (1.83—6.55) | 1.03 (0.76—1.36) | 1.39 (1.03—1.83) | |
| DEF Schleswig-Holstein | 11,404 | 29,085 | 531 (4.7%) | 97 (0.9%) | 4.43 (3.15—6.06) | 6.11 (4.35—8.35) | 1.52 (1.16—1.97) | 2.04 (1.55—2.63) | |
| DEG Thuringia | 7,382 | 19,498 | 235 (3.2%) | 43 (0.6%) | 3.36 (1.45—6.63) | 4.65 (2.01—9.16) | 1.25 (0.87—1.74) | 1.72 (1.2—2.39) | |
| United States - SEER | |||||||||
| SEER Reg 01 - San Francisco-Oakland SMSA | 11,863 | 32,582 | 592 (5.0%) | 240 (2.0%) | 4.69 (3.92—5.57) | 3.94 (3.09—4.94) | 3.75 (3.08—4.52) | 2.87 (2.19—3.7) | |
| SEER Reg 02 - Connecticut | 14,517 | 41,376 | 983 (6.8%) | 460 (3.2%) | 5.9 (5.22—6.65) | 5.1 (4.34—5.96) | 4.24 (3.66—4.89) | 3.5 (2.88—4.21) | |
| SEER Reg 20 - Detroit (Metropolitan) | 16,852 | 45,482 | 1175 (7.0%) | 511 (3.0%) | 5.49 (4.89—6.15) | 4.14 (3.52—4.83) | 3.61 (3.14—4.12) | 2.68 (2.21—3.21) | |
| SEER Reg 21 - Hawaii | 4,170 | 11,512 | 207 (5.0%) | 91 (2.2%) | 5.96 (4.33—8) | 4.52 (2.83—6.84) | 3.91 (2.87—5.2) | 2.82 (1.8—4.19) | |
| SEER Reg 22 - Iowa | 12,194 | 31,027 | 776 (6.4%) | 349 (2.9%) | 5.91 (5.03—6.89) | 5.24 (4.27—6.36) | 4.59 (3.96—5.3) | 3.82 (3.15—4.59) | |
| SEER Reg 23 - New Mexico | 4,730 | 12,325 | 169 (3.6%) | 65 (1.4%) | 3.19 (2.12—4.61) | 2.23 (1.22—3.75) | 3.53 (2.49—4.87) | 2.56 (1.57—3.96) | |
| SEER Reg 25 - Seattle (Puget Sound) | 14,891 | 39,455 | 985 (6.6%) | 429 (2.9%) | 5.89 (5.18—6.68) | 4.03 (3.34—4.83) | 4.35 (3.74—5.02) | 3.93 (3.26—4.69) | |
| SEER Reg 26 - Utah | 2,929 | 7,665 | 139 (4.7%) | 48 (1.6%) | 8.24 (5.22—12.36) | 5.15 (2.47—9.48) | 6.18 (4—9.12) | 4.01 (2.07—7) | |
| SEER Reg 27 - Atlanta (Metropolitan) | 7,989 | 21,413 | 479 (6.0%) | 218 (2.7%) | 6.76 (5.61—8.07) | 5.42 (4.21—6.85) | 4.57 (3.7—5.58) | 3.34 (2.5—4.39) | |
| SEER Reg 29 - Alaska Natives | 373 | 905 | 12 (3.2%) | 8 (2.1%) | 6.4 (2.08—14.93) | 3.7 (0.45—13.38) | 2.97 (0.61—8.68) | 4.37 (0.9—12.78) | |
| SEER Reg 31 - San Jose-Monterey | 5,430 | 15,129 | 285 (5.2%) | 104 (1.9%) | 5.8 (4.53—7.32) | 3.88 (2.65—5.48) | 2.89 (1.99—4.06) | 2.47 (1.51—3.82) | |
| SEER Reg 35 - Los Angeles | 20,707 | 57,262 | 1137 (5.5%) | 465 (2.2%) | 6.34 (5.61—7.15) | 4.85 (4.08—5.71) | 4 (3.46—4.6) | 3.05 (2.51—3.68) | |
| SEER Reg 37 - Rural Georgia | 633 | 1,549 | 35 (5.5%) | 11 (1.7%) | 5.03 (1.84—10.94) | 6.06 (1.97—14.15) | 2.22 (0.72—5.18) | 1.86 (0.38—5.45) | |
| SEER Reg 41 - California excluding SF/SJM/LA | 57,612 | 153,812 | 3236 (5.6%) | 1291 (2.2%) | 4.79 (4.45—5.15) | 3.62 (3.27—4) | 3.63 (3.34—3.95) | 2.58 (2.29—2.89) | |
| SEER Reg 42 - Kentucky | 26,986 | 67,443 | 1816 (6.7%) | 916 (3.4%) | 5.94 (5.41—6.5) | 4.74 (4.19—5.34) | 3.61 (3.28—3.95) | 2.78 (2.45—3.14) | |
| SEER Reg 44 - New Jersey | 35,900 | 101,006 | 2327 (6.5%) | 973 (2.7%) | 5.31 (4.88—5.76) | 4.57 (4.1—5.08) | 3.67 (3.32—4.05) | 3.07 (2.7—3.48) | |
| SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia | 26,046 | 65,650 | 1548 (5.9%) | 698 (2.7%) | 6.12 (5.49—6.8) | 4.82 (4.17—5.54) | 3.38 (3.04—3.75) | 2.71 (2.36—3.1) | |
| SIR Standardized incidence ratio; DCO death-certificate only; LC primary lung cancer; PYAR person-years at risk; SPC second primary cancer; SPLC second primary lung cancer | |||||||||
| 1 Microscopic diagnoses include cytology, and histology of the tumor. Other diagnoses include autopsy, clinical (without diagnostics), clinical with diagnostics, tumor markers and missing information on source of diagnosis. | |||||||||
#|eval: false
#|echo: false
# #example from gtsummary https://www.danieldsjoberg.com/gtsummary/articles/gallery.html#regression-tables-1
# gt_r1 <- glm(response ~ trt + grade, trial, family = binomial) %>%
# tbl_regression(exponentiate = TRUE)
# gt_r2 <- coxph(Surv(ttdeath, death) ~ trt + grade, trial) %>%
# tbl_regression(exponentiate = TRUE)
# gt_t1 <- trial[c("trt", "grade")] %>%
# tbl_summary(missing = "no") %>%
# add_n() %>%
# modify_header(stat_0 ~ "**n (%)**") %>%
# modify_footnote(stat_0 ~ NA_character_)
#
# theme_gtsummary_compact()
# tbl_merge(
# list(gt_t1, gt_r1, gt_r2),
# tab_spanner = c(NA_character_, "**Tumor Response**", "**Time to Death**")
#)supp_tab_subtypes_def_pre <-
d0_lung_wide_raw %>%
count(t_hist.1, t_histgroupiarc.1, t_sublungiarcgroup.1) %>%
select(-n)
#make table wider
supp_tab_subtypes_def <- supp_tab_subtypes_def_pre %>%
slice(1:50) %>%
bind_cols({supp_tab_subtypes_def_pre %>%
slice(51:100)}) %>%
bind_cols({supp_tab_subtypes_def_pre %>%
slice(101:150)}) %>%
bind_cols({supp_tab_subtypes_def_pre %>%
slice(151:200)}) %>%
janitor::clean_names()New names:
New names:
New names:
• `t_hist.1` -> `t_hist.1...1`
• `t_histgroupiarc.1` -> `t_histgroupiarc.1...2`
• `t_sublungiarcgroup.1` -> `t_sublungiarcgroup.1...3`
• `t_hist.1` -> `t_hist.1...4`
• `t_histgroupiarc.1` -> `t_histgroupiarc.1...5`
• `t_sublungiarcgroup.1` -> `t_sublungiarcgroup.1...6`
supp_tab_subtypes_def_gt <- supp_tab_subtypes_def %>%
#Start making gt table
gt::gt() %>%
#make header
gt::tab_header(
title = "Table S5: Conversion table of histology codes into ICD-O-3 histologically 'different' groups and histological subtypes of lung cancer",
subtitle = "") %>%
#rename columns
gt::cols_label(
starts_with("t_hist_1") ~ md("**Histology Code**"),
starts_with("t_histgroupiarc_1") ~ md("**Groups histologically ‘different’** (ICD-O-3 rev 1)"),
starts_with("t_sublungiarcgroup_1") ~ md("**Histological type of lung cancer** (IARC classification)")
)%>%
tab_source_note(
source_note = paste0("Notes: This classification is based on Fritz et al. 2013 [", ref_fritz_iarc, "] in combination with ICD-O-3 SEER Site/Histology Validation List 2015 to determine unusual codes for site Lung and Bronchus.")
) %>%
#special formatting
gt::sub_missing(
columns = everything(),
missing_text = "") %>%
##left align
gt::cols_align(
align = "left",
columns = contains("group")
) %>%
##column width
gt::cols_width(
starts_with("t_hist_1") ~ px(90),
starts_with("t_histgroupiarc_1") ~ px(248),
starts_with("t_sublungiarcgroup_1")~ px(235)
) %>%
#global table options
gt::opt_row_striping() %>% #add alternating stripes
gt::tab_options(data_row.padding = px(1))
#save table
supp_tab_subtypes_def_gt %>%
gt::gtsave(
file.path(output_dir_tables, "supp_tab_subtypes_def.png"),
vwidth = 3000, expand = 10
)
supp_tab_subtypes_def_gt %>%
gt_split(col_slice_at = "t_sublungiarcgroup_1_6") %>%
gt::gtsave(
file.path(output_dir_tables, "supp_tab_subtypes_def.docx")
)
#print table
supp_tab_subtypes_def_gt %>%
gt_split(col_slice_at = "t_sublungiarcgroup_1_6")| Table S5: Conversion table of histology codes into ICD-O-3 histologically 'different' groups and histological subtypes of lung cancer | |||||
|---|---|---|---|---|---|
| Histology Code | Groups histologically ‘different’ (ICD-O-3 rev 1) | Histological type of lung cancer (IARC classification) | Histology Code | Groups histologically ‘different’ (ICD-O-3 rev 1) | Histological type of lung cancer (IARC classification) |
| 8000 | Unspecified types of cancer | Other & unspecified (O&U) | 8144 | Adenocarcinomas | Other & unspecified (O&U) |
| 8001 | Unspecified types of cancer | Other & unspecified (O&U) | 8145 | Adenocarcinomas | Other & unspecified (O&U) |
| 8002 | Unspecified types of cancer | Other & unspecified (O&U) | 8147 | Adenocarcinomas | Other & unspecified (O&U) |
| 8003 | Unspecified types of cancer | Other & unspecified (O&U) | 8154 | Other specific carcinomas | Other & unspecified (O&U) |
| 8004 | Unspecified types of cancer | Other & unspecified (O&U) | 8170 | Other specific carcinomas | Other & unspecified (O&U) |
| 8005 | Unspecified types of cancer | Other & unspecified (O&U) | 8190 | Adenocarcinomas | Other & unspecified (O&U) |
| 8010 | Unspecified carcinomas (NOS) | Large cell carcinoma (LCC) | 8200 | Adenocarcinomas | Other & unspecified (O&U) |
| 8011 | Unspecified carcinomas (NOS) | Large cell carcinoma (LCC) | 8201 | Adenocarcinomas | Other & unspecified (O&U) |
| 8012 | Unspecified carcinomas (NOS) | Large cell carcinoma (LCC) | 8210 | Adenocarcinomas | Other & unspecified (O&U) |
| 8013 | Unspecified carcinomas (NOS) | Other & unspecified (O&U) | 8211 | Adenocarcinomas | Adenocarcinoma (AC) |
| 8014 | Unspecified carcinomas (NOS) | Large cell carcinoma (LCC) | 8230 | Other specific carcinomas | Adenocarcinoma (AC) |
| 8015 | Unspecified carcinomas (NOS) | Large cell carcinoma (LCC) | 8231 | Other specific carcinomas | Adenocarcinoma (AC) |
| 8020 | Unspecified carcinomas (NOS) | Large cell carcinoma (LCC) | 8240 | Other specific carcinomas | Other & unspecified (O&U) |
| 8021 | Unspecified carcinomas (NOS) | Large cell carcinoma (LCC) | 8241 | Other specific carcinomas | Other & unspecified (O&U) |
| 8022 | Unspecified carcinomas (NOS) | Large cell carcinoma (LCC) | 8243 | Other specific carcinomas | Other & unspecified (O&U) |
| 8030 | Other specific carcinomas | Large cell carcinoma (LCC) | 8244 | Other specific carcinomas | Other & unspecified (O&U) |
| 8031 | Other specific carcinomas | Large cell carcinoma (LCC) | 8245 | Other specific carcinomas | Other & unspecified (O&U) |
| 8032 | Other specific carcinomas | Other & unspecified (O&U) | 8246 | Other specific carcinomas | Other & unspecified (O&U) |
| 8033 | Other specific carcinomas | Other & unspecified (O&U) | 8247 | Other specific carcinomas | Other & unspecified (O&U) |
| 8034 | Other specific carcinomas | Other & unspecified (O&U) | 8249 | Other specific carcinomas | Other & unspecified (O&U) |
| 8035 | Other specific carcinomas | Large cell carcinoma (LCC) | 8250 | Other specific carcinomas | Adenocarcinoma (AC) |
| 8040 | Other specific carcinomas | Other & unspecified (O&U) | 8251 | Other specific carcinomas | Adenocarcinoma (AC) |
| 8041 | Other specific carcinomas | Small cell carcinoma (SCLC) | 8252 | Other specific carcinomas | Adenocarcinoma (AC) |
| 8042 | Other specific carcinomas | Small cell carcinoma (SCLC) | 8253 | Other specific carcinomas | Adenocarcinoma (AC) |
| 8043 | Other specific carcinomas | Small cell carcinoma (SCLC) | 8254 | Other specific carcinomas | Adenocarcinoma (AC) |
| 8044 | Other specific carcinomas | Small cell carcinoma (SCLC) | 8255 | Other specific carcinomas | Adenocarcinoma (AC) |
| 8045 | Other specific carcinomas | Small cell carcinoma (SCLC) | 8260 | Adenocarcinomas | Adenocarcinoma (AC) |
| 8046 | Other specific carcinomas | Other & unspecified (O&U) | 8263 | Adenocarcinomas | Other & unspecified (O&U) |
| 8050 | Unspecified carcinomas (NOS) | Squamous cell carcinoma (SCC) | 8290 | Adenocarcinomas | Other & unspecified (O&U) |
| 8051 | Squamous carcinomas | Squamous cell carcinoma (SCC) | 8310 | Adenocarcinomas | Large cell carcinoma (LCC) |
| 8052 | Squamous carcinomas | Squamous cell carcinoma (SCC) | 8320 | Adenocarcinomas | Other & unspecified (O&U) |
| 8070 | Squamous carcinomas | Squamous cell carcinoma (SCC) | 8323 | Adenocarcinomas | Adenocarcinoma (AC) |
| 8071 | Squamous carcinomas | Squamous cell carcinoma (SCC) | 8332 | Adenocarcinomas | Other & unspecified (O&U) |
| 8072 | Squamous carcinomas | Squamous cell carcinoma (SCC) | 8333 | Adenocarcinomas | Other & unspecified (O&U) |
| 8073 | Squamous carcinomas | Squamous cell carcinoma (SCC) | 8340 | Other specific carcinomas | Other & unspecified (O&U) |
| 8074 | Squamous carcinomas | Squamous cell carcinoma (SCC) | 8341 | Other specific carcinomas | Other & unspecified (O&U) |
| 8075 | Squamous carcinomas | Squamous cell carcinoma (SCC) | 8345 | Other specific carcinomas | Other & unspecified (O&U) |
| 8076 | Squamous carcinomas | Squamous cell carcinoma (SCC) | 8350 | Adenocarcinomas | Other & unspecified (O&U) |
| 8078 | Squamous carcinomas | Squamous cell carcinoma (SCC) | 8401 | Adenocarcinomas | Other & unspecified (O&U) |
| 8082 | Squamous carcinomas | Other & unspecified (O&U) | 8410 | Adenocarcinomas | Other & unspecified (O&U) |
| 8083 | Squamous carcinomas | Squamous cell carcinoma (SCC) | 8430 | Adenocarcinomas | Other & unspecified (O&U) |
| 8084 | Squamous carcinomas | Squamous cell carcinoma (SCC) | 8440 | Adenocarcinomas | Other & unspecified (O&U) |
| 8090 | Basal cell carcinomas | Other & unspecified (O&U) | 8441 | Adenocarcinomas | Other & unspecified (O&U) |
| 8094 | Basal cell carcinomas | Other & unspecified (O&U) | 8460 | Adenocarcinomas | Other & unspecified (O&U) |
| 8095 | Basal cell carcinomas | Other & unspecified (O&U) | 8470 | Adenocarcinomas | Other & unspecified (O&U) |
| 8120 | Squamous carcinomas | Other & unspecified (O&U) | 8471 | Adenocarcinomas | Other & unspecified (O&U) |
| 8123 | Squamous carcinomas | Other & unspecified (O&U) | 8480 | Adenocarcinomas | Adenocarcinoma (AC) |
| 8140 | Adenocarcinomas | Adenocarcinoma (AC) | 8481 | Adenocarcinomas | Adenocarcinoma (AC) |
| 8141 | Adenocarcinomas | Other & unspecified (O&U) | 8490 | Adenocarcinomas | Adenocarcinoma (AC) |
| 8143 | Adenocarcinomas | Other & unspecified (O&U) | 8500 | Adenocarcinomas | Other & unspecified (O&U) |
| Notes: This classification is based on Fritz et al. 2013 [11] in combination with ICD-O-3 SEER Site/Histology Validation List 2015 to determine unusual codes for site Lung and Bronchus. | |||||
| Table S5: Conversion table of histology codes into ICD-O-3 histologically 'different' groups and histological subtypes of lung cancer | |||||
|---|---|---|---|---|---|
| Histology Code | Groups histologically ‘different’ (ICD-O-3 rev 1) | Histological type of lung cancer (IARC classification) | Histology Code | Groups histologically ‘different’ (ICD-O-3 rev 1) | Histological type of lung cancer (IARC classification) |
| 8503 | Adenocarcinomas | Other & unspecified (O&U) | 8901 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8507 | Adenocarcinomas | Other & unspecified (O&U) | 8902 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8525 | Adenocarcinomas | Other & unspecified (O&U) | 8910 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8530 | Adenocarcinomas | Other & unspecified (O&U) | 8912 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8550 | Adenocarcinomas | Adenocarcinoma (AC) | 8920 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8551 | Adenocarcinomas | Adenocarcinoma (AC) | 8921 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8560 | Other specific carcinomas | Other & unspecified (O&U) | 8933 | Other specified types of cancer | Unusual |
| 8562 | Other specific carcinomas | Other & unspecified (O&U) | 8935 | Other specified types of cancer | Other & unspecified (O&U) |
| 8570 | Adenocarcinomas | Adenocarcinoma (AC) | 8940 | Adenocarcinomas | Unusual |
| 8571 | Adenocarcinomas | Adenocarcinoma (AC) | 8941 | Adenocarcinomas | Other & unspecified (O&U) |
| 8572 | Adenocarcinomas | Adenocarcinoma (AC) | 8951 | Other specified types of cancer | Other & unspecified (O&U) |
| 8574 | Adenocarcinomas | Adenocarcinoma (AC) | 8963 | Other specified types of cancer | Unusual |
| 8575 | Adenocarcinomas | Other & unspecified (O&U) | 8972 | Other specified types of cancer | Other & unspecified (O&U) |
| 8576 | Adenocarcinomas | Adenocarcinoma (AC) | 8973 | Other specified types of cancer | Other & unspecified (O&U) |
| 8580 | Other specific carcinomas | Other & unspecified (O&U) | 8980 | Other specified types of cancer | Other & unspecified (O&U) |
| 8581 | Other specific carcinomas | Unusual | 8982 | Other specified types of cancer | Other & unspecified (O&U) |
| 8585 | Other specific carcinomas | Other & unspecified (O&U) | 8990 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8680 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9015 | Other specified types of cancer | Other & unspecified (O&U) |
| 8710 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9040 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8711 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9041 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8720 | Other specified types of cancer | Other & unspecified (O&U) | 9043 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8743 | Other specified types of cancer | Other & unspecified (O&U) | 9064 | Other specified types of cancer | Other & unspecified (O&U) |
| 8770 | Other specified types of cancer | Other & unspecified (O&U) | 9065 | Other specified types of cancer | Other & unspecified (O&U) |
| 8772 | Other specified types of cancer | Other & unspecified (O&U) | 9070 | Other specified types of cancer | Other & unspecified (O&U) |
| 8800 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9071 | Other specified types of cancer | Other & unspecified (O&U) |
| 8801 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9080 | Other specified types of cancer | Other & unspecified (O&U) |
| 8802 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9084 | Other specified types of cancer | Other & unspecified (O&U) |
| 8803 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9085 | Other specified types of cancer | Other & unspecified (O&U) |
| 8804 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9100 | Other specified types of cancer | Other & unspecified (O&U) |
| 8805 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9101 | Other specified types of cancer | Other & unspecified (O&U) |
| 8806 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9120 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8810 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9130 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8811 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9133 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8815 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9150 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8824 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9180 | Sarcomas and soft tissue tumours | Unusual |
| 8830 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9181 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8840 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9182 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8850 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9220 | Sarcomas and soft tissue tumours | Unusual |
| 8851 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9231 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8852 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9250 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8853 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9260 | Other specified types of cancer | Unusual |
| 8854 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9364 | Other specified types of cancer | Other & unspecified (O&U) |
| 8855 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9365 | Other specified types of cancer | Other & unspecified (O&U) |
| 8858 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9370 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8890 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9473 | Other specified types of cancer | Other & unspecified (O&U) |
| 8891 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9500 | Other specified types of cancer | Other & unspecified (O&U) |
| 8894 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9522 | Other specified types of cancer | Other & unspecified (O&U) |
| 8895 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9540 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8896 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9560 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| 8900 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) | 9561 | Sarcomas and soft tissue tumours | Other & unspecified (O&U) |
| Notes: This classification is based on Fritz et al. 2013 [11] in combination with ICD-O-3 SEER Site/Histology Validation List 2015 to determine unusual codes for site Lung and Bronchus. | |||||
# gtExtras::gt_two_column_layout(list(supp_tab_subtypes_def_gt_pt1, supp_tab_subtypes_def_gt_pt2),
# output = "save",
# filename = file.path(output_dir_tables, "supp_tab_subtypes_def.png"),
# vwidth = 1160, expand = 20)To estimate the size of bias introduced by using general population reference rates for calculating SIR of same-site SPC when IARC/IACR MP rules are applied, we simulate various scenarios. First, we assume that the baseline risk of LC survivors to develop an SPLC is the same as for the general population (real SIR = 1.0). We determined the proportions of histologically different LC groups \({p_{hist}}_j\) in the analysis dataset for all index LC cases aged 30 to 99 years and excluded death certificate only (DCO) diagnoses. Then we assumed that the SPLC would have the same histology group distribution as for the first cancer. We expect the true SIR to be the fraction of observed and expected cases. In the case of the no risk difference between LC survivors and the general population \(SIR_{real}\), the count of observed cases \(count_i\) equals the number of expected cases (as the product of person-years at risk \(pyears_i\) and general population reference rates \(IR_i\)) for each specific stratum \(i\). We always stratified SIR in our analyses by age, sex, region, and period using stratum-specific reference rates for the general population.
\[SIR = \frac{O}{E} = \frac{\sum_{i=1}^{I}O_i}{\sum_{i=1}^{I}E_i} = \frac{\sum_{i=1}^{I}count_i}{\sum_{i=1}^{I}pyars_i*IR_i}\] \[SIR_{real}(1.0) = \frac{\sum_{i=1}^{I}1 * E_i}{\sum_{i=1}^{I}E_i}\]
Then we take into account that there is a correction factor \(x_{hist}\) for combinations of LC and SPLC that are not possible in our observed cases according to IARC/IACR MP rules. If we assume that the SPLC would have the same histology group distribution as for the first cancer and any histology group A can only be followed by a histology group, not A, then the correction factor is \(1-{p_{hist}}_A\). This gives for the simulated SIR under IARC/IACR rules:
\[SIR_{simIARC}(SIR_{real}=1.0) = \frac{O}{E} = \frac{\sum_{j=1}^{J}\sum_{i=1}^{I} 1* E_{ij}*{x_{hist}}_{j}}{\sum_{j=1}^{J}\sum_{i=1}^{I}E_{ij}}\]
Whereby
\[{x_{hist}}_{j} = 1-{p_{hist}}_j\]
The factor \({x_{hist}}_j\) is sex- and histology-specific, but the same for all age-groups and regions.
Generalized for any given \(SIR_{real}\), the simulation would give
\[SIR_{simIARC} = \frac{\sum_{j=1}^{J}\sum_{i=1}^{I} SIR_{real}* pyears_{ij}*IR_{ij}*{x_{hist}}_{j}}{\sum_{j=1}^{J}\sum_{i=1}^{I}pyears_{ij}*IR_{ij}}\]
Additionally to the scenario of no risk difference (\(SIR_{real} = 1.0\)), we also simulate a true doubling of SPLC risk for LC survivors (\(SIR_{real} = 2.0\)) and a risk increase comparable to data of U.S. lung cancer survivors for males (\(SIR_{real} = 3.38\)) and females (\(SIR_{real} = 4.85\)) published by Thakur et al. [@thakurRiskSecondLung2018].
supp_fig_histsupp_tab_samehist <- res_same_hist_histgroupiarc %>%
summarize(n_hist_same = sum(same_hist == "same type"),
n_hist_diff = sum(same_hist == "different type"),
n_splc = n(),
.by = c(reg.1, p_region.1)) %>%
mutate(freq_hist_same = n_hist_same / n_splc,
freq_hist_diff = n_hist_diff / n_splc,
freq_splc = 1,
reg.1 = case_match(reg.1,
"zfkd" ~ "Analysis Dataset – Germany (11 PBCR)",
"seer" ~ "Validation Dataset – United States (17 PBCR)"))
supp_tab_samehist_gt <- supp_tab_samehist %>%
gt() %>%
cols_hide(reg.1) %>%
#Column labelling
gt::cols_label(
p_region.1 = md("Regional Registry"),
n_hist_same = md("Same group (%)"),
n_hist_diff = md("Different group (%)"),
n_splc = md("Total (%)"),
)%>%
#gt: Define row groups -> careful: you need to add groups in reverse order... so bottom group first
gt::tab_row_group(
label = md("**Validation Dataset – United States (17 PBCR)**"),
rows = c(1:17)
) %>%
gt::tab_row_group(
label = md("**Analysis Dataset – Germany (11 PBCR)**"),
rows = c(18:27)
) %>%
gt::fmt_percent(
columns = starts_with("freq"),
decimals = 1
) %>%
gt::cols_merge_n_pct(
col_n = c(n_hist_same),
col_pct = c(freq_hist_same)
) %>%
gt::cols_merge_n_pct(
col_n = c(n_hist_diff),
col_pct = c(freq_hist_diff)
) %>%
gt::cols_merge_n_pct(
col_n = c(n_splc),
col_pct = c(freq_splc)
) %>%
#make header
gt::tab_header(
title = paste0("Table S8: Frequency of same-histology SPLC by region"),
subtitle = paste0("")) %>%
#footnotes
#footnotes
tab_source_note(
source_note = paste0("Groups of malignant neoplasms considered to be histologically ‘different’ according to IARC ICD-O-3, revision 1 (2013).")
) %>%
#special formatting
##make column labels bold
gt::tab_style(
style = cell_text(weight = "bold"),
locations =
cells_column_labels(everything())
) %>%
#global table options
gt::opt_row_striping() %>% #add alternating stripes
gt::tab_options(data_row.padding = px(2)) %>% # reduce row height
##column width
gt::cols_width(
p_region.1 ~ px(300),
contains("n_") ~ px(160)
)
#save table
supp_tab_samehist_gt %>%
gt::gtsave(
file.path(output_dir_tables, "supp_tab_samehist.png"),
vwidth = 1000, expand = 10
)
#print table
supp_tab_samehist_gt| Table S8: Frequency of same-histology SPLC by region | |||
|---|---|---|---|
| Regional Registry | Same group (%) | Different group (%) | Total (%) |
| Analysis Dataset – Germany (11 PBCR) | |||
| DEF Schleswig-Holstein | 0 | 97 (100.0%) | 97 (100.0%) |
| DE6 Hamburg | 0 | 7 (100.0%) | 7 (100.0%) |
| DE9 Lower Saxony | 0 | 53 (100.0%) | 53 (100.0%) |
| DE5 Bremen | 0 | 18 (100.0%) | 18 (100.0%) |
| DEA3 Muenster | 0 | 77 (100.0%) | 77 (100.0%) |
| DE2 Bavaria | 0 | 87 (100.0%) | 87 (100.0%) |
| DE4 Brandenburg | 0 | 67 (100.0%) | 67 (100.0%) |
| DED Saxony | 0 | 61 (100.0%) | 61 (100.0%) |
| DEG Thuringia | 0 | 43 (100.0%) | 43 (100.0%) |
| DE8 Mecklenburg-Western Pomerania | 0 | 32 (100.0%) | 32 (100.0%) |
| Validation Dataset – United States (17 PBCR) | |||
| SEER Reg 01 - San Francisco-Oakland SMSA | 107 (44.6%) | 133 (55.4%) | 240 (100.0%) |
| SEER Reg 02 - Connecticut | 188 (40.9%) | 272 (59.1%) | 460 (100.0%) |
| SEER Reg 20 - Detroit (Metropolitan) | 237 (46.4%) | 274 (53.6%) | 511 (100.0%) |
| SEER Reg 21 - Hawaii | 45 (49.5%) | 46 (50.5%) | 91 (100.0%) |
| SEER Reg 22 - Iowa | 134 (38.4%) | 215 (61.6%) | 349 (100.0%) |
| SEER Reg 23 - New Mexico | 31 (47.7%) | 34 (52.3%) | 65 (100.0%) |
| SEER Reg 25 - Seattle (Puget Sound) | 190 (44.3%) | 239 (55.7%) | 429 (100.0%) |
| SEER Reg 26 - Utah | 26 (54.2%) | 22 (45.8%) | 48 (100.0%) |
| SEER Reg 27 - Atlanta (Metropolitan) | 97 (44.5%) | 121 (55.5%) | 218 (100.0%) |
| SEER Reg 29 - Alaska Natives | 3 (37.5%) | 5 (62.5%) | 8 (100.0%) |
| SEER Reg 31 - San Jose-Monterey | 52 (50.0%) | 52 (50.0%) | 104 (100.0%) |
| SEER Reg 35 - Los Angeles | 214 (46.0%) | 251 (54.0%) | 465 (100.0%) |
| SEER Reg 37 - Rural Georgia | 3 (27.3%) | 8 (72.7%) | 11 (100.0%) |
| SEER Reg 41 - California excluding SF/SJM/LA | 607 (47.0%) | 684 (53.0%) | 1291 (100.0%) |
| SEER Reg 42 - Kentucky | 389 (42.5%) | 527 (57.5%) | 916 (100.0%) |
| SEER Reg 44 - New Jersey | 388 (39.9%) | 585 (60.1%) | 973 (100.0%) |
| SEER Reg 47 - Georgia excluding Atlanta/Rural Georgia | 287 (41.1%) | 411 (58.9%) | 698 (100.0%) |
| Groups of malignant neoplasms considered to be histologically ‘different’ according to IARC ICD-O-3, revision 1 (2013). | |||
#first overall and LC hist subtype results
sensa_tab3_pt1 <- sensa_res_sum_sir %>%
filter(fu_time_sort == 999 & str_detect(t_site, "Lung and Bronchus")) %>%
arrange(desc(registry)) %>%
mutate(t_site = "SPLC",
break_var = "t_sublungiarcgroup.1",
break_value = t_sublungiarcgroup.1) %>%
select(-t_sublungiarcgroup.1) %>%
pivot_wider(names_from = c(registry, method),
values_from = tidyselect::all_of(c("sir", "sir_lci", "sir_uci", "observed", "expected")),
names_glue = "{registry}.{method}.{.value}")
#second by age_group results
sensa_tab3_pt2 <- sensa_res_sum_sir_byage %>%
filter(fu_time_sort == 999 & str_detect(t_site, "Lung and Bronchus")) %>%
arrange(desc(registry)) %>%
mutate(t_site = "SPLC",
break_var = "p_agefcgroup",
break_value = p_agefcgroup) %>%
select(-t_sublungiarcgroup.1, -p_agefcgroup, -pyar) %>%
pivot_wider(names_from = c(registry, method),
values_from = tidyselect::all_of(c("sir", "sir_lci", "sir_uci", "observed", "expected")),
names_glue = "{registry}.{method}.{.value}")
#third by year_group results
sensa_tab3_pt3 <- sensa_res_sum_sir_byyear %>%
filter(fu_time_sort == 999 & str_detect(t_site, "Lung and Bronchus")) %>%
arrange(desc(registry)) %>%
mutate(t_site = "SPLC",
break_var = "p_yearfcgroup",
break_value = p_yearfcgroup) %>%
select(-t_sublungiarcgroup.1, -p_yearfcgroup, -pyar) %>%
pivot_wider(names_from = c(registry, method),
values_from = tidyselect::all_of(c("sir", "sir_lci", "sir_uci", "observed", "expected")),
names_glue = "{registry}.{method}.{.value}")
sensa_tab3 <- sensa_tab3_pt1 %>%
bind_rows(sensa_tab3_pt2) %>%
bind_rows(sensa_tab3_pt3) %>%
mutate(zfkd.plot = zfkd.sir1_raw.sir) %>%
#add columes of GER main analysis for reference
bind_cols({tab3 %>% select(mazfkd.sir1_raw.sir = zfkd.sir1_raw.sir, mazfkd.sir2_sub.sir = zfkd.sir2_sub.sir, mazfkd.sir2_sub.sir_lci = zfkd.sir2_sub.sir_lci, mazfkd.sir2_sub.sir_uci = zfkd.sir2_sub.sir_uci)}) %>%
#calculate deltas
mutate(diff.sir1_raw.diff = zfkd.sir1_raw.sir - mazfkd.sir1_raw.sir,
diff.sir2_sub.diff = zfkd.sir2_sub.sir - mazfkd.sir2_sub.sir,
.before = mazfkd.sir1_raw.sir) %>%
#add columns of US Validation data for reference
bind_cols({tab3 %>% select(seer.sir1_raw.sir, seer.sir2_sub.sir, seer.sir2_sub.sir_lci, seer.sir2_sub.sir_uci)})sensa_tab3_title <- md("S9. Table: Sensitivity analysis A – Risk for SPLC using unadjusted and histology-specific SIR method<br>[restricted to six German PBCR with low DCO rate]")
sensa_tab3_subtitle <- "Comparing results for Germany (IARC/IACR MP rules) and United States (Verification dataset - SEER MP rules)"
sensa_tab3_source_note <- md(paste0(
"Notes: ",
"The six included registries are Brandenburg 2007 to 2014, Bremen 2004 to 2014, Hamburg 2008 to 2014, Mecklenburg-Western Pomerania 2003 to 2011, Saarland 2002 to 2011 and Saxony 2005 to 2014. <br>",
"O<sub>SIR1</sub> number of cases observed in the data for SIR1<sub>raw</sub>; ",
"O<sub>SIR2</sub> number of cases observed in the data for SIR2<sub>sub</sub>, ZfKD data O<sub>SIR1</sub> = O<sub>SIR2</sub>; ",
"SEER Surveillance, Epidemiology, and End Results Program; ",
if(en_gb){"SIR standardised incidence ratio; "}else{"SIR standardized incidence ratio; "},
"SIR1<sub>raw</sub> unadjusted SIR using age-, sex-, region-, period-specific reference rates; ",
"SIR2<sub>sub</sub> histological subtype-specific SIR using age-, sex-, region-, period- and histology-specific reference rates and excluding same-histology group SPLC from observed and expected; ",
"SIR3<sub>IARC</sub> unadjusted SIR but only counting international primaries (IARC/IACR MP rules), for ZfKD data SIR1<sub>raw</sub> = SIR3<sub>IARC</sub>; ",
"SIR4<sub>subIARC</sub> histological subtype-specific SIR but only counting international primaries (IARC/IACR MP rules), for ZfKD data SIR2<sub>sub</sub> = SIR4<sub>subIARC</sub>; ",
"SPLC second primary lung cancer; ",
"x censored counts of observed smaller than 5 for data privacy reasons; ",
"ZfKD German Centre for Cancer Registry Data"))
supp_tab_sensa_gt <- sensa_tab3 %>%
gt() %>%
cols_hide(c(any_of(c("t_site", "fu_time", "fu_time_sort",
"sex", "break_var")),
ends_with(c("uci", "expected")),
ends_with(c("sir3_iarc.observed", "sir4_subiarc.observed")),
ends_with(c("sir1_raw.sir_lci", "sir3_iarc.sir_lci", "sir4_subiarc.sir_lci")),
contains(c("zfkd.sir3", "zfkd.sir4", "zfkd.sir1_raw.observed")),
)) %>%
#make header
gt::tab_header(
title = sensa_tab3_title,
subtitle = sensa_tab3_subtitle) %>%
#rename columns
gt::cols_label(
contains("break_var") ~ "",
contains("break_value") ~ "",
contains("plot") ~ "",
ends_with(".sir_lci") ~ md("95% CI<sub>SIR2</sub>"),
ends_with(".expected") ~ "E",
ends_with("sir1_raw.observed") ~ md("O<sub>SIR1</sub>"),
ends_with("sir2_sub.observed") ~ md("O<sub>SIR2</sub>"),
ends_with(".sir1_raw.sir") ~ md("SIR1<sub>raw</sub>"),
ends_with(".sir2_sub.sir") ~ md("**SIR2<sub>sub</sub>**"),
ends_with(".sir3_iarc.sir") ~ md("SIR3<sub>IARC</sub>"),
ends_with(".sir4_subiarc.sir") ~ md("SIR4<sub>subIARC</sub>"),
ends_with("diff.sir1_raw.diff") ~ md("Δ SIR1<sub>raw</sub>"),
ends_with("diff.sir2_sub.diff") ~ md("**Δ SIR2<sub>sub</sub>**")
) %>%
#make col groups (spanner)
tab_spanner(
label = md("**Germany (6 of 11 regions)**<br>(Sensitivity dataset - IARC/IACR MP rules)"),
columns = c(zfkd.sir1_raw.sir,
zfkd.sir2_sub.sir,
zfkd.sir2_sub.sir_lci,
zfkd.sir2_sub.observed,
zfkd.plot),
id = "german_spanner"
) %>%
tab_spanner(
label = md("**Difference to main analysis**"),
columns = c(diff.sir1_raw.diff,
diff.sir2_sub.diff),
id = "diff_spanner"
) %>%
tab_spanner(
label = md("**Germany (all regions)**<br>(Main analysis dataset - IARC/IACR MP rules)"),
columns = c(mazfkd.sir1_raw.sir,
mazfkd.sir2_sub.sir,
mazfkd.sir2_sub.sir_lci
),
id = "magerman_spanner"
) %>%
tab_spanner(
label = md("**United States**<br>(Validation dataset - SEER MP rules)"),
columns = c(seer.sir1_raw.sir,
seer.sir2_sub.sir,
seer.sir2_sub.sir_lci),
id = "us_spanner"
) %>%
gt::rows_add(sex = "female_header", .before = 1) %>%
gt::rows_add(sex = "male_header", .before = 1) %>%
#make row groups
gt::tab_row_group(
label = "",
rows = (sex == "female_header"),
id = "female"
) %>%
gt::tab_row_group(
label = md("**Females**"),
rows = (break_value == "Total - All lung cancers" & sex == "Female"),
id = "female_tot"
) %>%
gt::tab_row_group(
label = "Histology of LC",
rows = (break_var == "t_sublungiarcgroup.1" & break_value != "Total - All lung cancers" & sex == "Female"),
id = "female_sub"
) %>%
gt::tab_row_group(
label = "Age at diagnosis of LC",
rows = (break_var == "p_agefcgroup" & sex == "Female"),
id = "female_age"
) %>%
gt::tab_row_group(
label = "Year of diagnosis of LC",
rows = (break_var == "p_yearfcgroup" & sex == "Female"),
id = "female_year"
) %>%
#make row groups
gt::tab_row_group(
label = "",
rows = (sex == "male_header"),
id = "male"
) %>%
gt::tab_row_group(
label = md("**Males**"),
rows = (break_value == "Total - All lung cancers" & sex == "Male"),
id = "male_tot"
) %>%
gt::tab_row_group(
label = "Histology of LC",
rows = (break_var == "t_sublungiarcgroup.1" & break_value != "Total - All lung cancers" & sex == "Male"),
id = "male_sub"
) %>%
gt::tab_row_group(
label = "Age at diagnosis of LC",
rows = (break_var == "p_agefcgroup" & sex == "Male"),
id = "male_age"
) %>%
gt::tab_row_group(
label = "Year of diagnosis of LC",
rows = (break_var == "p_yearfcgroup" & sex == "Male"),
id = "male_year"
) %>%
row_group_order(groups = c("female", "female_tot", "female_sub", "female_age", "female_year",
"male", "male_tot", "male_sub", "male_age", "male_year")) %>%
#column formatting
gt::fmt_number(
columns = contains(c("pyar", "observed", "n_base")),
decimals = 0
) %>%
gt::fmt_number(
columns = contains(c("expected")),
decimals = 1
) %>%
gt::fmt_number(
columns = ends_with(c(".sir", ".sir_lci", ".sir_uci", ".diff")),
decimals = 2
) %>%
gt::sub_missing(
columns = everything(),
missing_text = ""
) %>%
#censor small values
sub_small_vals(
columns = zfkd.sir2_sub.observed,
rows = everything(),
threshold = 5,
small_pattern = "x") %>%
cols_merge_range(
col_begin = zfkd.sir2_sub.sir_lci,
col_end = zfkd.sir2_sub.sir_uci
) %>%
cols_merge_range(
col_begin = mazfkd.sir2_sub.sir_lci,
col_end = mazfkd.sir2_sub.sir_uci
) %>%
cols_merge_range(
col_begin = seer.sir2_sub.sir_lci,
col_end = seer.sir2_sub.sir_uci,
) %>%
#plotted columns
plot_gt_sircomp_dotplot(var1 = zfkd.plot, var2 = zfkd.sir2_sub.sir, var3 = seer.sir2_sub.sir,
col1 = colors_4_method[1], col2 = colors_4_method[2], col3 = colors_4_method[3],
label_x1 = x1, label_x2 = x2, label_x3 = "US",
x_min = 0.5, x_max = 10, width = 70) %>%
tab_source_note(
source_note = sensa_tab3_source_note
) %>%
#special formatting
##make column and row group labels bold
gt::tab_style(
style = cell_text(weight = "bold"),
locations = list(
cells_body(columns = c(zfkd.sir2_sub.sir,
diff.sir2_sub.diff,
mazfkd.sir2_sub.sir,
seer.sir2_sub.sir))
)
) %>%
gt:: cols_width(
break_value ~ px(240),
zfkd.sir1_raw.sir ~ px(65),
zfkd.sir2_sub.sir ~ px(65),
zfkd.sir2_sub.sir_lci ~ px(87),
zfkd.sir2_sub.observed ~ px(42),
zfkd.plot ~ px(250),
diff.sir1_raw.diff ~ px(85),
diff.sir2_sub.diff ~ px(85),
mazfkd.sir1_raw.sir ~ px(65),
mazfkd.sir2_sub.sir ~ px(65),
mazfkd.sir2_sub.sir_lci ~ px(87),
seer.sir1_raw.sir ~ px(65),
seer.sir2_sub.sir ~ px(65),
seer.sir2_sub.sir_lci ~ px(95),
) %>%
#global table options
gt::opt_row_striping() %>% #add alternating stripes
gt::tab_options(data_row.padding = px(3), # reduce row height
row_group.padding = px(8), # reduce row height
stub.border.width = px(20), # increase space between column stubs
row.striping.include_stub = TRUE)
#output table
supp_tab_sensa_gtWarning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
| S9. Table: Sensitivity analysis A – Risk for SPLC using unadjusted and histology-specific SIR method [restricted to six German PBCR with low DCO rate] |
|||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Comparing results for Germany (IARC/IACR MP rules) and United States (Verification dataset - SEER MP rules) | |||||||||||||
| Germany (6 of 11 regions) (Sensitivity dataset - IARC/IACR MP rules) |
Difference to main analysis | Germany (all regions) (Main analysis dataset - IARC/IACR MP rules) |
United States (Validation dataset - SEER MP rules) |
||||||||||
| SIR1raw | SIR2sub | 95% CISIR2 | OSIR2 | Δ SIR1raw | Δ SIR2sub | SIR1raw | SIR2sub | 95% CISIR2 | SIR1raw | SIR2sub | 95% CISIR2 | ||
| Females | |||||||||||||
| Total - All lung cancers | 1.64 | 2.30 | 1.50–3.37 | 26 | −0.50 | −0.68 | 2.14 | 2.98 | 2.53–3.49 | 5.52 | 4.37 | 4.18–4.56 | |
| Histology of LC | |||||||||||||
| Adenocarcinoma (AC) | 1.05 | 1.58 | 0.68–3.11 | 8 | −0.64 | −0.95 | 1.69 | 2.53 | 1.91–3.28 | 6.08 | 4.48 | 4.20–4.76 | |
| Large cell carcinoma (LCC) | 1.44 | 1.69 | 0.04–9.40 | x | 1.20 | 1.40 | 0.24 | 0.29 | 0.01–1.60 | 4.04 | 3.95 | 3.27–4.73 | |
| Other & unspecified (O&U) | 1.18 | 1.67 | 0.34–4.88 | x | −0.10 | −0.13 | 1.28 | 1.80 | 0.98–3.01 | 4.04 | 3.88 | 3.49–4.30 | |
| Small cell carcinoma (SCLC) | 2.92 | 4.29 | 1.58–9.35 | 6 | 0.49 | 0.72 | 2.43 | 3.57 | 2.26–5.35 | 4.26 | 4.51 | 3.79–5.32 | |
| Squamous cell carcinoma (SCC) | 2.71 | 3.25 | 1.41–6.41 | 8 | −1.64 | −1.92 | 4.35 | 5.17 | 3.94–6.67 | 6.50 | 4.66 | 4.26–5.09 | |
| Age at diagnosis of LC | |||||||||||||
| 30 - 49 | 5.92 | 9.32 | 1.13–33.68 | x | 1.72 | 2.93 | 4.20 | 6.39 | 2.57–13.17 | 37.95 | 32.88 | 26.97–39.70 | |
| 50 - 59 | 3.28 | 4.95 | 2.14–9.75 | 8 | −0.28 | −0.27 | 3.56 | 5.22 | 3.79–7.00 | 14.29 | 12.06 | 10.85–13.37 | |
| 60 - 69 | 1.28 | 1.82 | 0.73–3.75 | 7 | −1.09 | −1.53 | 2.37 | 3.35 | 2.59–4.26 | 7.23 | 5.94 | 5.54–6.36 | |
| 70 - 79 | 1.50 | 2.05 | 0.94–3.89 | 9 | 0.16 | 0.24 | 1.34 | 1.81 | 1.24–2.55 | 3.90 | 3.06 | 2.83–3.30 | |
| 80+ | 0.00 | 0.00 | 0.00–2.96 | 0 | −0.82 | −1.06 | 0.82 | 1.06 | 0.34–2.47 | 2.05 | 1.55 | 1.30–1.83 | |
| Year of diagnosis of LC | |||||||||||||
| 2002 - 2005 | 1.00 | 1.37 | 0.28–4.01 | x | −1.27 | −1.70 | 2.27 | 3.07 | 2.31–3.99 | 5.71 | 4.44 | 4.15–4.74 | |
| 2006 - 2009 | 1.76 | 2.46 | 1.31–4.21 | 13 | −0.15 | −0.20 | 1.91 | 2.66 | 1.99–3.48 | 5.61 | 4.48 | 4.18–4.80 | |
| 2010 - 2013 | 1.82 | 2.60 | 1.25–4.78 | 10 | −0.49 | −0.73 | 2.31 | 3.33 | 2.44–4.44 | 5.00 | 4.02 | 3.63–4.44 | |
| Males | |||||||||||||
| Total - All lung cancers | 0.73 | 0.99 | 0.78–1.23 | 80 | −0.12 | −0.16 | 0.85 | 1.15 | 1.03–1.27 | 3.77 | 2.94 | 2.81–3.08 | |
| Histology of LC | |||||||||||||
| Adenocarcinoma (AC) | 0.77 | 1.03 | 0.68–1.50 | 27 | −0.16 | −0.19 | 0.93 | 1.22 | 1.02–1.45 | 4.19 | 3.13 | 2.91–3.37 | |
| Large cell carcinoma (LCC) | 0.00 | 0.00 | 0.00–0.76 | 0 | −0.04 | −0.04 | 0.04 | 0.04 | 0.00–0.24 | 2.79 | 2.92 | 2.42–3.48 | |
| Other & unspecified (O&U) | 0.53 | 0.68 | 0.27–1.41 | 7 | −0.27 | −0.36 | 0.80 | 1.04 | 0.74–1.41 | 2.68 | 2.58 | 2.29–2.89 | |
| Small cell carcinoma (SCLC) | 0.96 | 1.28 | 0.61–2.35 | 10 | −0.07 | −0.08 | 1.03 | 1.36 | 0.99–1.81 | 3.09 | 3.38 | 2.80–4.04 | |
| Squamous cell carcinoma (SCC) | 0.79 | 1.13 | 0.79–1.56 | 36 | −0.10 | −0.12 | 0.89 | 1.25 | 1.07–1.46 | 4.22 | 2.86 | 2.62–3.11 | |
| Age at diagnosis of LC | |||||||||||||
| 30 - 49 | 1.57 | 2.20 | 0.06–12.27 | x | −0.07 | −0.04 | 1.64 | 2.24 | 0.73–5.22 | 26.40 | 22.55 | 17.33–28.86 | |
| 50 - 59 | 1.40 | 1.94 | 1.00–3.39 | 12 | −0.21 | −0.26 | 1.61 | 2.20 | 1.70–2.81 | 10.10 | 8.01 | 7.14–8.95 | |
| 60 - 69 | 0.84 | 1.16 | 0.80–1.61 | 34 | −0.32 | −0.41 | 1.16 | 1.57 | 1.37–1.80 | 4.79 | 3.74 | 3.46–4.03 | |
| 70 - 79 | 0.63 | 0.86 | 0.59–1.21 | 32 | 0.10 | 0.15 | 0.53 | 0.71 | 0.58–0.86 | 2.73 | 2.16 | 1.99–2.35 | |
| 80+ | 0.10 | 0.13 | 0.00–0.73 | x | −0.10 | −0.12 | 0.20 | 0.25 | 0.10–0.51 | 1.60 | 1.26 | 1.06–1.49 | |
| Year of diagnosis of LC | |||||||||||||
| 2002 - 2005 | 0.50 | 0.68 | 0.36–1.16 | 13 | −0.29 | −0.37 | 0.79 | 1.05 | 0.89–1.22 | 3.71 | 2.92 | 2.72–3.13 | |
| 2006 - 2009 | 0.87 | 1.17 | 0.86–1.56 | 47 | −0.07 | −0.09 | 0.94 | 1.26 | 1.07–1.48 | 4.07 | 3.15 | 2.92–3.40 | |
| 2010 - 2013 | 0.67 | 0.92 | 0.56–1.42 | 20 | −0.17 | −0.23 | 0.84 | 1.15 | 0.90–1.44 | 3.30 | 2.59 | 2.30–2.91 | |
| Notes: The six included registries are Brandenburg 2007 to 2014, Bremen 2004 to 2014, Hamburg 2008 to 2014, Mecklenburg-Western Pomerania 2003 to 2011, Saarland 2002 to 2011 and Saxony 2005 to 2014. OSIR1 number of cases observed in the data for SIR1raw; OSIR2 number of cases observed in the data for SIR2sub, ZfKD data OSIR1 = OSIR2; SEER Surveillance, Epidemiology, and End Results Program; SIR standardized incidence ratio; SIR1raw unadjusted SIR using age-, sex-, region-, period-specific reference rates; SIR2sub histological subtype-specific SIR using age-, sex-, region-, period- and histology-specific reference rates and excluding same-histology group SPLC from observed and expected; SIR3IARC unadjusted SIR but only counting international primaries (IARC/IACR MP rules), for ZfKD data SIR1raw = SIR3IARC; SIR4subIARC histological subtype-specific SIR but only counting international primaries (IARC/IACR MP rules), for ZfKD data SIR2sub = SIR4subIARC; SPLC second primary lung cancer; x censored counts of observed smaller than 5 for data privacy reasons; ZfKD German Centre for Cancer Registry Data |
|||||||||||||
#save table
supp_tab_sensa_gt %>%
gt::gtsave(
file.path(output_dir_tables, "supp_tab_sensa.png"),
vwidth = 1450, expand = 30
)Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
supp_tab_sensa_gt %>%
gt::gtsave(
file.path(output_dir_tables, "supp_tab_sensa.rtf")
)Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous x-axis
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
#first overall and LC hist subtype results
sensb_tab3_pt1 <- sensb_res_sum_sir %>%
filter(fu_time_sort == 999 & str_detect(t_site, "Lung and Bronchus")) %>%
arrange(desc(registry)) %>%
mutate(t_site = "SPLC",
break_var = "t_sublungiarcgroup.1",
break_value = t_sublungiarcgroup.1) %>%
select(-t_sublungiarcgroup.1) %>%
pivot_wider(names_from = c(registry, method),
values_from = tidyselect::all_of(c("sir", "sir_lci", "sir_uci", "observed", "expected")),
names_glue = "{registry}.{method}.{.value}")
#second by age_group results
sensb_tab3_pt2 <- sensb_res_sum_sir_byage %>%
filter(fu_time_sort == 999 & str_detect(t_site, "Lung and Bronchus")) %>%
arrange(desc(registry)) %>%
mutate(t_site = "SPLC",
break_var = "p_agefcgroup",
break_value = p_agefcgroup) %>%
select(-t_sublungiarcgroup.1, -p_agefcgroup, -pyar) %>%
pivot_wider(names_from = c(registry, method),
values_from = tidyselect::all_of(c("sir", "sir_lci", "sir_uci", "observed", "expected")),
names_glue = "{registry}.{method}.{.value}")
#third by year_group results
sensb_tab3_pt3 <- sensb_res_sum_sir_byyear %>%
filter(fu_time_sort == 999 & str_detect(t_site, "Lung and Bronchus")) %>%
arrange(desc(registry)) %>%
mutate(t_site = "SPLC",
break_var = "p_yearfcgroup",
break_value = p_yearfcgroup) %>%
select(-t_sublungiarcgroup.1, -p_yearfcgroup, -pyar) %>%
pivot_wider(names_from = c(registry, method),
values_from = tidyselect::all_of(c("sir", "sir_lci", "sir_uci", "observed", "expected")),
names_glue = "{registry}.{method}.{.value}")
sensb_tab3 <- sensb_tab3_pt1 %>%
bind_rows(sensb_tab3_pt2) %>%
bind_rows(sensb_tab3_pt3) %>%
mutate(zfkd.plot = zfkd.sir1_raw.sir,
seer.plot = seer.sir1_raw.sir) %>%
#add columns of US main analysis for reference
bind_cols({tab3 %>% select(maseer.sir1_raw.sir = seer.sir1_raw.sir, maseer.sir2_sub.sir = seer.sir2_sub.sir, maseer.sir2_sub.sir_lci = seer.sir2_sub.sir_lci, maseer.sir2_sub.sir_uci = seer.sir2_sub.sir_uci)}) %>%
#calculate deltas
mutate(diff.sir1_raw.diff = seer.sir1_raw.sir - maseer.sir1_raw.sir,
diff.sir2_sub.diff = seer.sir2_sub.sir - maseer.sir2_sub.sir,
.before = maseer.sir1_raw.sir)sensb_tab3_title <- md("S10. Table: Sensitivity analysis B – Risk for SPLC using unadjusted and histology-specific SIR method <br> [SEER restricted to White population]")
sensb_tab3_subtitle <- "Comparing results for Germany (IARC/IACR MP rules) and United States (Sensitivity dataset - SEER MP rules)"
sensb_tab3_source_note <- md(paste0(
"Notes: ",
"This sensitivity analysis replicates Table 3, with SEER data restricted to White population only. <br>",
"O<sub>SIR1</sub> number of cases observed in the data for SIR1<sub>raw</sub>; ",
"O<sub>SIR2</sub> number of cases observed in the data for SIR2<sub>sub</sub>, ZfKD data O<sub>SIR1</sub> = O<sub>SIR2</sub>; ",
"SEER Surveillance, Epidemiology, and End Results Program; ",
if(en_gb){"SIR standardised incidence ratio; "}else{"SIR standardized incidence ratio; "},
"SIR1<sub>raw</sub> unadjusted SIR using age-, sex-, region-, period-specific reference rates; ",
"SIR2<sub>sub</sub> histological subtype-specific SIR using age-, sex-, region-, period- and histology-specific reference rates and excluding same-histology group SPLC from observed and expected; ",
"SIR3<sub>IARC</sub> unadjusted SIR but only counting international primaries (IARC/IACR MP rules), for ZfKD data SIR1<sub>raw</sub> = SIR3<sub>IARC</sub>; ",
"SIR4<sub>subIARC</sub> histological subtype-specific SIR but only counting international primaries (IARC/IACR MP rules), for ZfKD data SIR2<sub>sub</sub> = SIR4<sub>subIARC</sub>; ",
"SPLC second primary lung cancer; ",
"x censored counts of observed smaller than 5 for data privacy reasons; ",
"ZfKD German Centre for Cancer Registry Data"))
supp_tab_sensb_gt <- sensb_tab3 %>%
gt() %>%
cols_hide(c(any_of(c("t_site", "fu_time", "fu_time_sort",
"sex", "break_var")),
ends_with(c("uci", "expected")),
ends_with(c("sir3_iarc.observed", "sir4_subiarc.observed")),
ends_with(c("sir1_raw.sir_lci", "sir3_iarc.sir_lci", "sir4_subiarc.sir_lci")),
contains(c("zfkd.sir3", "zfkd.sir4", "zfkd.sir1_raw.observed")),
)) %>%
#make header
gt::tab_header(
title = sensb_tab3_title,
subtitle = sensb_tab3_subtitle) %>%
#rename columns
gt::cols_label(
contains("break_var") ~ "",
contains("break_value") ~ "",
contains("plot") ~ "",
ends_with(".sir_lci") ~ md("95% CI<sub>SIR2</sub>"),
ends_with(".expected") ~ "E",
ends_with("sir1_raw.observed") ~ md("O<sub>SIR1</sub>"),
ends_with("sir2_sub.observed") ~ md("O<sub>SIR2</sub>"),
ends_with(".sir1_raw.sir") ~ md("SIR1<sub>raw</sub>"),
ends_with(".sir2_sub.sir") ~ md("**SIR2<sub>sub</sub>**"),
ends_with(".sir3_iarc.sir") ~ md("SIR3<sub>IARC</sub>"),
ends_with(".sir4_subiarc.sir") ~ md("SIR4<sub>subIARC</sub>"),
ends_with("diff.sir1_raw.diff") ~ md("Δ SIR1<sub>raw</sub>"),
ends_with("diff.sir2_sub.diff") ~ md("**Δ SIR2<sub>sub</sub>**")
) %>%
#make col groups (spanner)
tab_spanner(
label = md("**Germany**<br>(Analysis dataset - IARC/IACR MP rules)"),
columns = c(zfkd.sir1_raw.sir,
zfkd.sir2_sub.sir,
zfkd.sir2_sub.sir_lci,
zfkd.sir2_sub.observed,
zfkd.plot),
id = "german_spanner"
) %>%
tab_spanner(
label = md("**United States (White)**<br>(Sensitivity dataset - SEER MP rules)"),
columns = c(seer.sir1_raw.sir,
seer.sir2_sub.sir,
seer.sir2_sub.sir_lci,
seer.sir3_iarc.sir,
seer.sir4_subiarc.sir,
seer.sir1_raw.observed,
seer.sir2_sub.observed,
seer.plot),
id = "us_spanner"
) %>%
tab_spanner(
label = md("**Difference to main analysis**<br>(US White - All races)"),
columns = c(diff.sir1_raw.diff,
diff.sir2_sub.diff),
id = "diff_spanner"
) %>%
tab_spanner(
label = md("**United States (All races)**<br>(Validation dataset - SEER MP rules)"),
columns = c(maseer.sir1_raw.sir,
maseer.sir2_sub.sir,
maseer.sir2_sub.sir_lci
),
id = "maus_spanner"
) %>%
gt::rows_add(sex = "female_header", .before = 1) %>%
gt::rows_add(sex = "male_header", .before = 1) %>%
#make row groups
gt::tab_row_group(
label = "",
rows = (sex == "female_header"),
id = "female"
) %>%
gt::tab_row_group(
label = md("**Females**"),
rows = (break_value == "Total - All lung cancers" & sex == "Female"),
id = "female_tot"
) %>%
gt::tab_row_group(
label = "Histology of LC",
rows = (break_var == "t_sublungiarcgroup.1" & break_value != "Total - All lung cancers" & sex == "Female"),
id = "female_sub"
) %>%
gt::tab_row_group(
label = "Age at diagnosis of LC",
rows = (break_var == "p_agefcgroup" & sex == "Female"),
id = "female_age"
) %>%
gt::tab_row_group(
label = "Year of diagnosis of LC",
rows = (break_var == "p_yearfcgroup" & sex == "Female"),
id = "female_year"
) %>%
#make row groups
gt::tab_row_group(
label = "",
rows = (sex == "male_header"),
id = "male"
) %>%
gt::tab_row_group(
label = md("**Males**"),
rows = (break_value == "Total - All lung cancers" & sex == "Male"),
id = "male_tot"
) %>%
gt::tab_row_group(
label = "Histology of LC",
rows = (break_var == "t_sublungiarcgroup.1" & break_value != "Total - All lung cancers" & sex == "Male"),
id = "male_sub"
) %>%
gt::tab_row_group(
label = "Age at diagnosis of LC",
rows = (break_var == "p_agefcgroup" & sex == "Male"),
id = "male_age"
) %>%
gt::tab_row_group(
label = "Year of diagnosis of LC",
rows = (break_var == "p_yearfcgroup" & sex == "Male"),
id = "male_year"
) %>%
row_group_order(groups = c("female", "female_tot", "female_sub", "female_age", "female_year",
"male", "male_tot", "male_sub", "male_age", "male_year")) %>%
#column formatting
gt::fmt_number(
columns = contains(c("pyar", "observed", "n_base")),
decimals = 0
) %>%
gt::fmt_number(
columns = contains(c("expected")),
decimals = 1
) %>%
gt::fmt_number(
columns = ends_with(c(".sir", ".sir_lci", ".sir_uci", ".diff")),
decimals = 2
) %>%
gt::sub_missing(
columns = everything(),
missing_text = ""
) %>%
#censor small values
sub_small_vals(
columns = zfkd.sir2_sub.observed,
rows = everything(),
threshold = 5,
small_pattern = "x") %>%
cols_merge_range(
col_begin = zfkd.sir2_sub.sir_lci,
col_end = zfkd.sir2_sub.sir_uci
) %>%
cols_merge_range(
col_begin = seer.sir2_sub.sir_lci,
col_end = seer.sir2_sub.sir_uci,
) %>%
cols_merge_range(
col_begin = maseer.sir2_sub.sir_lci,
col_end = maseer.sir2_sub.sir_uci,
) %>%
#plotted columns
plot_gt_sircomp_dotplot(var1 = zfkd.plot, var2 = zfkd.sir2_sub.sir, var3 = seer.sir2_sub.sir,
col1 = colors_4_method[1], col2 = colors_4_method[2], col3 = colors_4_method[3],
label_x1 = x1, label_x2 = x2, label_x3 = "US",
x_min = 0.5, x_max = 10, width = 70) %>%
plot_gt_sircomp_dotplot(var1 = seer.plot, var2 = seer.sir2_sub.sir, var3 = zfkd.sir2_sub.sir,
col1 = colors_4_method[1], col2 = colors_4_method[2], col3 = colors_4_method[3],
label_x1 = x1, label_x2 = x2, label_x3 = "GER",
x_min = 0.5, x_max = 10, width = 70) %>%
tab_source_note(
source_note = sensb_tab3_source_note
) %>%
#special formatting
##make column and row group labels bold
gt::tab_style(
style = cell_text(weight = "bold"),
locations = list(
cells_body(columns = c(zfkd.sir2_sub.sir, diff.sir2_sub.diff, seer.sir2_sub.sir, maseer.sir2_sub.sir))
)
) %>%
gt:: cols_width(
break_value ~ px(240),
zfkd.sir1_raw.sir ~ px(65),
zfkd.sir2_sub.sir ~ px(65),
zfkd.sir2_sub.sir_lci ~ px(87),
zfkd.sir2_sub.observed ~ px(42),
zfkd.plot ~ px(250),
seer.sir1_raw.sir ~ px(65),
seer.sir2_sub.sir ~ px(65),
seer.sir2_sub.sir_lci ~ px(95),
seer.sir3_iarc.sir ~ px(65),
seer.sir4_subiarc.sir ~ px(85),
seer.sir1_raw.observed ~ px(50),
seer.sir2_sub.observed ~ px(50),
seer.plot ~ px(250),
diff.sir1_raw.diff ~ px(85),
diff.sir2_sub.diff ~ px(85),
maseer.sir1_raw.sir ~ px(65),
maseer.sir2_sub.sir ~ px(65),
maseer.sir2_sub.sir_lci ~ px(95)
) %>%
#global table options
gt::opt_row_striping() %>% #add alternating stripes
gt::tab_options(data_row.padding = px(3), # reduce row height
row_group.padding = px(8), # reduce row height
stub.border.width = px(20), # increase space between column stubs
row.striping.include_stub = TRUE)
#output table
supp_tab_sensb_gtWarning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
| S10. Table: Sensitivity analysis B – Risk for SPLC using unadjusted and histology-specific SIR method [SEER restricted to White population] |
||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Comparing results for Germany (IARC/IACR MP rules) and United States (Sensitivity dataset - SEER MP rules) | ||||||||||||||||||
| Germany (Analysis dataset - IARC/IACR MP rules) |
United States (White) (Sensitivity dataset - SEER MP rules) |
Difference to main analysis (US White - All races) |
United States (All races) (Validation dataset - SEER MP rules) |
|||||||||||||||
| SIR1raw | SIR2sub | 95% CISIR2 | OSIR2 | SIR1raw | SIR2sub | 95% CISIR2 | SIR3IARC | SIR4subIARC | OSIR1 | OSIR2 | Δ SIR1raw | Δ SIR2sub | SIR1raw | SIR2sub | 95% CISIR2 | |||
| Females | ||||||||||||||||||
| Total - All lung cancers | 2.14 | 2.98 | 2.53–3.49 | 154 | 5.46 | 4.30 | 4.11–4.50 | 2.48 | 3.50 | 3,328 | 1,858 | −0.06 | −0.07 | 5.52 | 4.37 | 4.18–4.56 | ||
| Histology of LC | ||||||||||||||||||
| Adenocarcinoma (AC) | 1.69 | 2.53 | 1.91–3.28 | 57 | 6.04 | 4.40 | 4.11–4.70 | 2.59 | 3.89 | 1,800 | 872 | −0.04 | −0.08 | 6.08 | 4.48 | 4.20–4.76 | ||
| Large cell carcinoma (LCC) | 0.24 | 0.29 | 0.01–1.60 | x | 4.24 | 4.23 | 3.48–5.09 | 0.33 | 0.37 | 128 | 112 | 0.20 | 0.28 | 4.04 | 3.95 | 3.27–4.73 | ||
| Other & unspecified (O&U) | 1.28 | 1.80 | 0.98–3.01 | 14 | 3.94 | 3.76 | 3.35–4.20 | 2.02 | 2.94 | 481 | 315 | −0.10 | −0.12 | 4.04 | 3.88 | 3.49–4.30 | ||
| Small cell carcinoma (SCLC) | 2.43 | 3.57 | 2.26–5.35 | 23 | 4.24 | 4.53 | 3.77–5.39 | 2.53 | 3.99 | 187 | 127 | −0.02 | 0.02 | 4.26 | 4.51 | 3.79–5.32 | ||
| Squamous cell carcinoma (SCC) | 4.35 | 5.17 | 3.94–6.67 | 59 | 6.38 | 4.54 | 4.13–4.99 | 3.25 | 3.92 | 732 | 432 | −0.12 | −0.12 | 6.50 | 4.66 | 4.26–5.09 | ||
| Age at diagnosis of LC | ||||||||||||||||||
| 30 - 49 | 4.20 | 6.39 | 2.57–13.17 | 7 | 38.65 | 32.92 | 26.29–40.70 | 16.32 | 25.68 | 157 | 85 | 0.70 | 0.04 | 37.95 | 32.88 | 26.97–39.70 | ||
| 50 - 59 | 3.56 | 5.22 | 3.79–7.00 | 44 | 14.61 | 11.97 | 10.65–13.40 | 6.65 | 10.12 | 559 | 301 | 0.32 | −0.09 | 14.29 | 12.06 | 10.85–13.37 | ||
| 60 - 69 | 2.37 | 3.35 | 2.59–4.26 | 66 | 7.25 | 5.94 | 5.52–6.39 | 3.34 | 4.85 | 1,291 | 728 | 0.02 | 0.00 | 7.23 | 5.94 | 5.54–6.36 | ||
| 70 - 79 | 1.34 | 1.81 | 1.24–2.55 | 32 | 3.90 | 3.06 | 2.82–3.31 | 1.74 | 2.44 | 1,102 | 617 | 0.00 | 0.00 | 3.90 | 3.06 | 2.83–3.30 | ||
| 80+ | 0.82 | 1.06 | 0.34–2.47 | 5 | 2.07 | 1.60 | 1.33–1.90 | 0.97 | 1.29 | 219 | 127 | 0.02 | 0.05 | 2.05 | 1.55 | 1.30–1.83 | ||
| Year of diagnosis of LC | ||||||||||||||||||
| 2002 - 2005 | 2.27 | 3.07 | 2.31–3.99 | 55 | 5.62 | 4.39 | 4.10–4.71 | 2.59 | 3.61 | 1,436 | 804 | −0.09 | −0.05 | 5.71 | 4.44 | 4.15–4.74 | ||
| 2006 - 2009 | 1.91 | 2.66 | 1.99–3.48 | 53 | 5.58 | 4.42 | 4.10–4.75 | 2.56 | 3.62 | 1,289 | 722 | −0.03 | −0.06 | 5.61 | 4.48 | 4.18–4.80 | ||
| 2010 - 2013 | 2.31 | 3.33 | 2.44–4.44 | 46 | 4.92 | 3.89 | 3.48–4.33 | 2.12 | 3.04 | 603 | 332 | −0.08 | −0.13 | 5.00 | 4.02 | 3.63–4.44 | ||
| Males | ||||||||||||||||||
| Total - All lung cancers | 0.85 | 1.15 | 1.03–1.27 | 388 | 3.79 | 2.91 | 2.77–3.07 | 1.71 | 2.33 | 2,651 | 1,498 | 0.02 | −0.03 | 3.77 | 2.94 | 2.81–3.08 | ||
| Histology of LC | ||||||||||||||||||
| Adenocarcinoma (AC) | 0.93 | 1.22 | 1.02–1.45 | 132 | 4.21 | 3.08 | 2.84–3.34 | 1.97 | 2.77 | 1,136 | 591 | 0.02 | −0.05 | 4.19 | 3.13 | 2.91–3.37 | ||
| Large cell carcinoma (LCC) | 0.04 | 0.04 | 0.00–0.24 | x | 2.68 | 2.78 | 2.26–3.39 | 0.27 | 0.31 | 108 | 98 | −0.11 | −0.14 | 2.79 | 2.92 | 2.42–3.48 | ||
| Other & unspecified (O&U) | 0.80 | 1.04 | 0.74–1.41 | 40 | 2.68 | 2.55 | 2.24–2.89 | 1.35 | 1.87 | 351 | 241 | 0.00 | −0.03 | 2.68 | 2.58 | 2.29–2.89 | ||
| Small cell carcinoma (SCLC) | 1.03 | 1.36 | 0.99–1.81 | 46 | 3.03 | 3.29 | 2.68–4.00 | 2.05 | 3.02 | 135 | 100 | −0.06 | −0.09 | 3.09 | 3.38 | 2.80–4.04 | ||
| Squamous cell carcinoma (SCC) | 0.89 | 1.25 | 1.07–1.46 | 169 | 4.29 | 2.89 | 2.63–3.16 | 1.80 | 2.38 | 921 | 468 | 0.07 | 0.03 | 4.22 | 2.86 | 2.62–3.11 | ||
| Age at diagnosis of LC | ||||||||||||||||||
| 30 - 49 | 1.64 | 2.24 | 0.73–5.22 | 5 | 29.60 | 24.40 | 18.17–32.08 | 14.74 | 20.96 | 88 | 51 | 3.20 | 1.85 | 26.40 | 22.55 | 17.33–28.86 | ||
| 50 - 59 | 1.61 | 2.20 | 1.70–2.81 | 65 | 10.80 | 8.38 | 7.36–9.50 | 4.60 | 6.44 | 441 | 244 | 0.70 | 0.37 | 10.10 | 8.01 | 7.14–8.95 | ||
| 60 - 69 | 1.16 | 1.57 | 1.37–1.80 | 207 | 4.90 | 3.77 | 3.47–4.09 | 2.17 | 3.00 | 1,023 | 568 | 0.11 | 0.03 | 4.79 | 3.74 | 3.46–4.03 | ||
| 70 - 79 | 0.53 | 0.71 | 0.58–0.86 | 104 | 2.76 | 2.16 | 1.98–2.35 | 1.30 | 1.78 | 903 | 519 | 0.03 | 0.00 | 2.73 | 2.16 | 1.99–2.35 | ||
| 80+ | 0.20 | 0.25 | 0.10–0.51 | 7 | 1.63 | 1.26 | 1.04–1.52 | 0.70 | 0.92 | 196 | 116 | 0.03 | 0.00 | 1.60 | 1.26 | 1.06–1.49 | ||
| Year of diagnosis of LC | ||||||||||||||||||
| 2002 - 2005 | 0.79 | 1.05 | 0.89–1.22 | 156 | 3.73 | 2.87 | 2.66–3.10 | 1.70 | 2.29 | 1,160 | 662 | 0.02 | −0.05 | 3.71 | 2.92 | 2.72–3.13 | ||
| 2006 - 2009 | 0.94 | 1.26 | 1.07–1.48 | 157 | 4.05 | 3.09 | 2.84–3.35 | 1.79 | 2.45 | 1,052 | 588 | −0.02 | −0.06 | 4.07 | 3.15 | 2.92–3.40 | ||
| 2010 - 2013 | 0.84 | 1.15 | 0.90–1.44 | 75 | 3.40 | 2.67 | 2.35–3.02 | 1.57 | 2.19 | 439 | 248 | 0.10 | 0.08 | 3.30 | 2.59 | 2.30–2.91 | ||
| Notes: This sensitivity analysis replicates Table 3, with SEER data restricted to White population only. OSIR1 number of cases observed in the data for SIR1raw; OSIR2 number of cases observed in the data for SIR2sub, ZfKD data OSIR1 = OSIR2; SEER Surveillance, Epidemiology, and End Results Program; SIR standardized incidence ratio; SIR1raw unadjusted SIR using age-, sex-, region-, period-specific reference rates; SIR2sub histological subtype-specific SIR using age-, sex-, region-, period- and histology-specific reference rates and excluding same-histology group SPLC from observed and expected; SIR3IARC unadjusted SIR but only counting international primaries (IARC/IACR MP rules), for ZfKD data SIR1raw = SIR3IARC; SIR4subIARC histological subtype-specific SIR but only counting international primaries (IARC/IACR MP rules), for ZfKD data SIR2sub = SIR4subIARC; SPLC second primary lung cancer; x censored counts of observed smaller than 5 for data privacy reasons; ZfKD German Centre for Cancer Registry Data |
||||||||||||||||||
#save table
supp_tab_sensb_gt %>%
gt::gtsave(
file.path(output_dir_tables, "supp_tab_sensb.png"),
vwidth = 2050, expand = 30
)Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
supp_tab_sensb_gt %>%
gt::gtsave(
file.path(output_dir_tables, "supp_tab_sensb.rtf")
)Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_segment()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 1 rows containing missing values (`geom_text()`).
fig_max_sir <- 10
fig_sites_sir1 <- c("Lung and Bronchus")
fig_sites_sir2 <- c("Lung and Bronchus [excluding same histgroupiarc]")
fig_splot_1 <- res_sum_sir %>%
#only keep SIR1, Totals zfkd from results
filter(method == "sir1_raw" & t_sublungiarcgroup.1 == "Total - All lung cancers" &
registry == "zfkd") %>%
#make dummy facet variable
mutate(label = "SIR1 raw - Germany") %>%
#make plots
plot_sir_byfutime2(., sites_to_plot = fig_sites_sir1, facet_vars = label ~ t_site, y_lim = fig_max_sir,
timecats_to_plot = c("6-12 months", "1-3 years", "3-5 years","5-10 years", "Total")
, vlab_x_off = -0.38, vlab_y_pos = 0.5, vlab_y_diff = .1, colors_sex = colors_2_sex
)
fig_splot_2 <- res_sum_sir %>%
#only keep SIR2, Totals zfkd from results
filter(method == "sir2_sub" & t_sublungiarcgroup.1 == "Total - All lung cancers" &
registry == "zfkd") %>%
#make dummy facet variable
mutate(label = "SIR2 sub - Germany") %>%
#make plots
plot_sir_byfutime2(., sites_to_plot = fig_sites_sir2, facet_vars = label ~ t_site, y_lim = fig_max_sir,
timecats_to_plot = c("6-12 months", "1-3 years", "3-5 years","5-10 years", "Total")
, vlab_x_off = -0.38, vlab_y_pos = 0.5, vlab_y_diff = .1, colors_sex = colors_2_sex
)
fig_splot_3 <- res_sum_sir %>%
#only keep SIR1, Totals seer from results
filter(method == "sir1_raw" & t_sublungiarcgroup.1 == "Total - All lung cancers" &
registry == "seer") %>%
#make dummy facet variable
mutate(label = "SIR1 raw - United States") %>%
#make plots
plot_sir_byfutime2(., sites_to_plot = fig_sites_sir1, facet_vars = label ~ t_site, y_lim = fig_max_sir,
timecats_to_plot = c("6-12 months", "1-3 years", "3-5 years","5-10 years", "Total")
, vlab_x_off = -0.38, vlab_y_pos = 0.5, vlab_y_diff = .1, colors_sex = colors_2_sex
)
fig_splot_4 <- res_sum_sir %>%
#only keep SIR2, Totals seer from results
filter(method == "sir2_sub" & t_sublungiarcgroup.1 == "Total - All lung cancers" &
registry == "seer") %>%
#make dummy facet variable
mutate(label = "SIR2 sub - United States") %>%
#make plots
plot_sir_byfutime2(., sites_to_plot = fig_sites_sir2, facet_vars = label ~ t_site, y_lim = fig_max_sir,
timecats_to_plot = c("6-12 months", "1-3 years", "3-5 years","5-10 years", "Total")
, vlab_x_off = -0.38, vlab_y_pos = 0.5, vlab_y_diff = .1, colors_sex = colors_2_sex
)#design layout for plots; # stands for empty region
supp_fig_byfutime_layout <- "
AACC
BBDD
"
supp_fig_byfutime_title <- rlang::englue('Figure S11: Relative risk for SPLC in lung cancer survivors stratified by follow-up time (n={format(nrow(d1_lung_wide), big.mark = ",")}).')
supp_fig_byfutime_subtitle <- rlang::englue("SIRs stratified by sex on the log-transformed y axis (for females in yellow and for males in blue) and stratified by follow-up time on the x axis. <br>
Top row shows values for unadjusted estimation of risk for SPLC after LC using general reference rates (SIR1raw). <br>
Bottom row shows SIR using subtype-specific reference rates excluding same-histology group (SIR2sub).")
supp_fig_byfutime_caption <- paste0("Notes: Numeric SIR values are given for total follow-up time (6 mo to 10+ years).", if(en_gb){" SIR Standardised incidence ratio; "}else{" SIR Standardized incidence ratio; "}, "length of error bar indicates 95% CI", ".")
supp_fig_byfutime <- wrap_plots(
A = fig_splot_1,
B = fig_splot_2,
C = fig_splot_3,
D = fig_splot_4,
design = supp_fig_byfutime_layout) +
#create common legend and axis labels
theme(legend.position="bottom") +
#Label Title and Caption
plot_annotation(
title = supp_fig_byfutime_title,
#title = element_text(paste0("Figure S11: Incidence of SPLC (n=", format(nrow(d1_lung_wide), big.mark = ","), ")")),
subtitle = supp_fig_byfutime_subtitle,
#subtitle = element_text("SIRs stratified by sex on the log-transformed y axis (for males in green and for females in yellow) and stratified by follow-up time on the x axis. Top row shows values for unadjusted "), #alternative str_glue, but this doesn't work with format(big.mark = ",")
caption = supp_fig_byfutime_caption,
theme = theme(plot.title = ggtext::element_markdown(size = 16),
plot.subtitle = ggtext::element_markdown(),
plot.caption = ggtext::element_markdown(hjust = 0), #left alignment of caption
)
)
#print figure
supp_fig_byfutimeWarning: `position_dodge()` requires non-overlapping x intervals
Warning: Removed 2 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_text()`).
Removed 2 rows containing missing values (`geom_text()`).
Warning: `position_dodge()` requires non-overlapping x intervals
Warning: Removed 2 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_text()`).
Removed 2 rows containing missing values (`geom_text()`).
Warning: `position_dodge()` requires non-overlapping x intervals
Warning: Removed 2 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_text()`).
Removed 2 rows containing missing values (`geom_text()`).
Warning: `position_dodge()` requires non-overlapping x intervals
Warning: Removed 2 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_text()`).
Removed 2 rows containing missing values (`geom_text()`).
#save figure
supp_fig_byfutime %>%
ggsave(filename = file.path(output_dir_tables, "supp_fig_byfutime.png"),
width = 10, height = 8)Warning: `position_dodge()` requires non-overlapping x intervals
Warning: Removed 2 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_text()`).
Removed 2 rows containing missing values (`geom_text()`).
Warning: `position_dodge()` requires non-overlapping x intervals
Warning: Removed 2 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_text()`).
Removed 2 rows containing missing values (`geom_text()`).
Warning: `position_dodge()` requires non-overlapping x intervals
Warning: Removed 2 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_text()`).
Removed 2 rows containing missing values (`geom_text()`).
Warning: `position_dodge()` requires non-overlapping x intervals
Warning: Removed 2 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_text()`).
Removed 2 rows containing missing values (`geom_text()`).
supp_fig_byfutime %>%
ggsave(filename = file.path(output_dir_tables, "supp_fig_byfutime.tiff"),
width = 10, height = 8, units = "in")Warning: `position_dodge()` requires non-overlapping x intervals
Warning: Removed 2 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_text()`).
Removed 2 rows containing missing values (`geom_text()`).
Warning: `position_dodge()` requires non-overlapping x intervals
Warning: Removed 2 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_text()`).
Removed 2 rows containing missing values (`geom_text()`).
Warning: `position_dodge()` requires non-overlapping x intervals
Warning: Removed 2 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_text()`).
Removed 2 rows containing missing values (`geom_text()`).
Warning: `position_dodge()` requires non-overlapping x intervals
Warning: Removed 2 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_text()`).
Removed 2 rows containing missing values (`geom_text()`).
testthat::test_that(
"Counted cases for LC are the same in all results files",
testthat::expect_equal(
res_n_splc_seer + res_n_splc_zfkd,
res_sum_sir1_raw %>%
filter(t_sublungiarcgroup.1 == "Total - All lung cancers" & fu_time == "Total 0.5 to Inf years") %>% summarize(n = sum(observed)) %>% pull(n)
)
)Test passed 🥇
#test in table1
#total PYAR unchanged
testthat::expect_equal(
tab1 %>% filter(category == "Sum of PYAR") %>% pull(value_seer_Male),
324648
)Workspace has been saved to file H:/Documents/Projects/SPN Data Analysis/Publications/pub_spc_sirmethods_bmed/3_output/01.an_workspace.RData.
if(save_workspace){
# list_objects <- ls(all.names = TRUE)
# #define which objects not to save
# list_objects <- list_objects[!list_objects %in% c(
# ".Random.seed" ,"add_required_packages" ,
# )]
#explicitly define objects to save
list_objects <- c("d1_lung_wide",
"res_same_hist_histgroupiarc",
"res_sum_sir",
"tab_crude_ir_ger",
"tab_crude_ir_us",
"tab1",
"tab1_gt",
"tab1_pre",
"tab2",
"tab2_gt",
"tab3",
"tab3_gt",
"rh",
"rows_ci",
"res_sum_sir_byreg",
"fig2",
"pop_methods_sum_byregion",
"unusual_hist",
"supp_tab_samehist",
"sensa_tab3",
"sensb_tab3",
"res_sensa_stats",
"supp_tab_def_gt",
"supp_tab_filter_gt",
"supp_tab_dm_gt",
"supp_tab_qual",
"supp_tab_qual_gt",
"supp_tab_subtypes_def",
"supp_tab_subtypes_def_gt",
"supp_fig_hist",
"supp_tab_samehist_gt",
"supp_tab_sensa_gt",
"supp_tab_sensb_gt",
"supp_fig_byfutime"
)
save(list = list_objects, file=output_workspace, envir = .GlobalEnv)
}